diff mbox series

[1/4] mm/page_alloc: fix non cma alloc context

Message ID 1594789529-6206-1-git-send-email-iamjoonsoo.kim@lge.com (mailing list archive)
State New, archived
Headers show
Series [1/4] mm/page_alloc: fix non cma alloc context | expand

Commit Message

Joonsoo Kim July 15, 2020, 5:05 a.m. UTC
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>

Currently, preventing cma area in page allocation is implemented by using
current_gfp_context(). However, there are two problems of this
implementation.

First, this doesn't work for allocation fastpath. In the fastpath,
original gfp_mask is used since current_gfp_context() is introduced in
order to control reclaim and it is on slowpath.
Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
on the ZONE_MOVABLE for allocation target.

To fix these problems, this patch changes the implementation to exclude
cma area in page allocation. Main point of this change is using the
alloc_flags. alloc_flags is mainly used to control allocation so it fits
for excluding cma area in allocation.

Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)
Cc: <stable@vger.kernel.org>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 include/linux/sched/mm.h |  4 ----
 mm/page_alloc.c          | 27 +++++++++++++++------------
 2 files changed, 15 insertions(+), 16 deletions(-)

Comments

Vlastimil Babka July 15, 2020, 8:24 a.m. UTC | #1
On 7/15/20 7:05 AM, js1304@gmail.com wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> Currently, preventing cma area in page allocation is implemented by using
> current_gfp_context(). However, there are two problems of this
> implementation.
> 
> First, this doesn't work for allocation fastpath. In the fastpath,
> original gfp_mask is used since current_gfp_context() is introduced in
> order to control reclaim and it is on slowpath.
> Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> on the ZONE_MOVABLE for allocation target.
> 
> To fix these problems, this patch changes the implementation to exclude
> cma area in page allocation. Main point of this change is using the
> alloc_flags. alloc_flags is mainly used to control allocation so it fits
> for excluding cma area in allocation.

Agreed, should have been done with ALLOC_CMA since the beginning.

> Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)

More digits please.
Fixes: d7fefcc8de91 ("mm/cma: add PF flag to force non cma alloc")

> Cc: <stable@vger.kernel.org>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  include/linux/sched/mm.h |  4 ----
>  mm/page_alloc.c          | 27 +++++++++++++++------------
>  2 files changed, 15 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index 44ad5b7..a73847a 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags)
>  			flags &= ~(__GFP_IO | __GFP_FS);
>  		else if (pflags & PF_MEMALLOC_NOFS)
>  			flags &= ~__GFP_FS;

Above this hunk you should also remove PF_MEMALLOC_NOCMA from the test.

> -#ifdef CONFIG_CMA
> -		if (pflags & PF_MEMALLOC_NOCMA)
> -			flags &= ~__GFP_MOVABLE;
> -#endif
>  	}
>  	return flags;
>  }
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6416d08..cd53894 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
>  	 * allocating from CMA when over half of the zone's free memory
>  	 * is in the CMA area.
>  	 */
> -	if (migratetype == MIGRATE_MOVABLE &&
> +	if (alloc_flags & ALLOC_CMA &&
>  	    zone_page_state(zone, NR_FREE_CMA_PAGES) >
>  	    zone_page_state(zone, NR_FREE_PAGES) / 2) {
>  		page = __rmqueue_cma_fallback(zone, order);
> @@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
>  retry:
>  	page = __rmqueue_smallest(zone, order, migratetype);
>  	if (unlikely(!page)) {
> -		if (migratetype == MIGRATE_MOVABLE)
> +		if (alloc_flags & ALLOC_CMA)
>  			page = __rmqueue_cma_fallback(zone, order);
>  
>  		if (!page && __rmqueue_fallback(zone, order, migratetype,
> @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
>  	if (likely(!alloc_harder))
>  		unusable_free += z->nr_reserved_highatomic;
>  
> -#ifdef CONFIG_CMA
>  	/* If allocation can't use CMA areas don't use free CMA pages */
> -	if (!(alloc_flags & ALLOC_CMA))
> +	if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA))
>  		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
> -#endif
>  
>  	return unusable_free;
>  }
> @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
>  	return alloc_flags;
>  }
>  
> +static inline void current_alloc_flags(gfp_t gfp_mask,
> +				unsigned int *alloc_flags)
> +{
> +	unsigned int pflags = READ_ONCE(current->flags);
> +
> +	if (!(pflags & PF_MEMALLOC_NOCMA) &&
> +		gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> +		*alloc_flags |= ALLOC_CMA;
> +}

I don't like the modification through parameter, would just do what
current_gfp_context() does and return the modified value.
Also make it a no-op (including no READ_ONCE(current->flags)) if !CONFIG_CMA,
please.

>  /*
>   * get_page_from_freelist goes through the zonelist trying to allocate
>   * a page.
> @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  	struct pglist_data *last_pgdat_dirty_limit = NULL;
>  	bool no_fallback;
>  
> +	current_alloc_flags(gfp_mask, &alloc_flags);

I don't see why to move the test here? It will still be executed in the
fastpath, if that's what you wanted to avoid.

> +
>  retry:
>  	/*
>  	 * Scan zonelist, looking for a zone with enough free.
> @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
>  	} else if (unlikely(rt_task(current)) && !in_interrupt())
>  		alloc_flags |= ALLOC_HARDER;
>  
> -#ifdef CONFIG_CMA
> -	if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> -		alloc_flags |= ALLOC_CMA;
> -#endif

I would just replace this here with:
alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);

>  	return alloc_flags;
>  }
>  
> @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>  	if (should_fail_alloc_page(gfp_mask, order))
>  		return false;
>  
> -	if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
> -		*alloc_flags |= ALLOC_CMA;

And same here... Ah, I see. current_alloc_flags() should probably take a
migratetype parameter instead of gfp_mask then.

> -
>  	return true;
>  }
>  
>
Michal Hocko July 15, 2020, 8:28 a.m. UTC | #2
On Wed 15-07-20 14:05:26, Joonsoo Kim wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> Currently, preventing cma area in page allocation is implemented by using
> current_gfp_context(). However, there are two problems of this
> implementation.
> 
> First, this doesn't work for allocation fastpath. In the fastpath,
> original gfp_mask is used since current_gfp_context() is introduced in
> order to control reclaim and it is on slowpath.
> Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> on the ZONE_MOVABLE for allocation target.

This can be especially a problem with movable_node configurations where
a large portion of the memory is in movable zones.

> To fix these problems, this patch changes the implementation to exclude
> cma area in page allocation. Main point of this change is using the
> alloc_flags. alloc_flags is mainly used to control allocation so it fits
> for excluding cma area in allocation.

The approach is sensible and the patch makes sense to me from a quick
glance but I am not really familiar with all subtle details about cma
integration with the allocator so I do not feel confident to provide my
ack.

Thanks!

> Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  include/linux/sched/mm.h |  4 ----
>  mm/page_alloc.c          | 27 +++++++++++++++------------
>  2 files changed, 15 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index 44ad5b7..a73847a 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags)
>  			flags &= ~(__GFP_IO | __GFP_FS);
>  		else if (pflags & PF_MEMALLOC_NOFS)
>  			flags &= ~__GFP_FS;
> -#ifdef CONFIG_CMA
> -		if (pflags & PF_MEMALLOC_NOCMA)
> -			flags &= ~__GFP_MOVABLE;
> -#endif
>  	}
>  	return flags;
>  }
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6416d08..cd53894 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
>  	 * allocating from CMA when over half of the zone's free memory
>  	 * is in the CMA area.
>  	 */
> -	if (migratetype == MIGRATE_MOVABLE &&
> +	if (alloc_flags & ALLOC_CMA &&
>  	    zone_page_state(zone, NR_FREE_CMA_PAGES) >
>  	    zone_page_state(zone, NR_FREE_PAGES) / 2) {
>  		page = __rmqueue_cma_fallback(zone, order);
> @@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
>  retry:
>  	page = __rmqueue_smallest(zone, order, migratetype);
>  	if (unlikely(!page)) {
> -		if (migratetype == MIGRATE_MOVABLE)
> +		if (alloc_flags & ALLOC_CMA)
>  			page = __rmqueue_cma_fallback(zone, order);
>  
>  		if (!page && __rmqueue_fallback(zone, order, migratetype,
> @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
>  	if (likely(!alloc_harder))
>  		unusable_free += z->nr_reserved_highatomic;
>  
> -#ifdef CONFIG_CMA
>  	/* If allocation can't use CMA areas don't use free CMA pages */
> -	if (!(alloc_flags & ALLOC_CMA))
> +	if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA))
>  		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
> -#endif
>  
>  	return unusable_free;
>  }
> @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
>  	return alloc_flags;
>  }
>  
> +static inline void current_alloc_flags(gfp_t gfp_mask,
> +				unsigned int *alloc_flags)
> +{
> +	unsigned int pflags = READ_ONCE(current->flags);
> +
> +	if (!(pflags & PF_MEMALLOC_NOCMA) &&
> +		gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> +		*alloc_flags |= ALLOC_CMA;
> +}
> +
>  /*
>   * get_page_from_freelist goes through the zonelist trying to allocate
>   * a page.
> @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  	struct pglist_data *last_pgdat_dirty_limit = NULL;
>  	bool no_fallback;
>  
> +	current_alloc_flags(gfp_mask, &alloc_flags);
> +
>  retry:
>  	/*
>  	 * Scan zonelist, looking for a zone with enough free.
> @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
>  	} else if (unlikely(rt_task(current)) && !in_interrupt())
>  		alloc_flags |= ALLOC_HARDER;
>  
> -#ifdef CONFIG_CMA
> -	if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> -		alloc_flags |= ALLOC_CMA;
> -#endif
>  	return alloc_flags;
>  }
>  
> @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>  	if (should_fail_alloc_page(gfp_mask, order))
>  		return false;
>  
> -	if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
> -		*alloc_flags |= ALLOC_CMA;
> -
>  	return true;
>  }
>  
> -- 
> 2.7.4
Joonsoo Kim July 16, 2020, 7:27 a.m. UTC | #3
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>
> On 7/15/20 7:05 AM, js1304@gmail.com wrote:
> > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> >
> > Currently, preventing cma area in page allocation is implemented by using
> > current_gfp_context(). However, there are two problems of this
> > implementation.
> >
> > First, this doesn't work for allocation fastpath. In the fastpath,
> > original gfp_mask is used since current_gfp_context() is introduced in
> > order to control reclaim and it is on slowpath.
> > Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> > on the ZONE_MOVABLE for allocation target.
> >
> > To fix these problems, this patch changes the implementation to exclude
> > cma area in page allocation. Main point of this change is using the
> > alloc_flags. alloc_flags is mainly used to control allocation so it fits
> > for excluding cma area in allocation.
>
> Agreed, should have been done with ALLOC_CMA since the beginning.
>
> > Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)
>
> More digits please.
> Fixes: d7fefcc8de91 ("mm/cma: add PF flag to force non cma alloc")

Will do.

> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > ---
> >  include/linux/sched/mm.h |  4 ----
> >  mm/page_alloc.c          | 27 +++++++++++++++------------
> >  2 files changed, 15 insertions(+), 16 deletions(-)
> >
> > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> > index 44ad5b7..a73847a 100644
> > --- a/include/linux/sched/mm.h
> > +++ b/include/linux/sched/mm.h
> > @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags)
> >                       flags &= ~(__GFP_IO | __GFP_FS);
> >               else if (pflags & PF_MEMALLOC_NOFS)
> >                       flags &= ~__GFP_FS;
>
> Above this hunk you should also remove PF_MEMALLOC_NOCMA from the test.

Will do.

> > -#ifdef CONFIG_CMA
> > -             if (pflags & PF_MEMALLOC_NOCMA)
> > -                     flags &= ~__GFP_MOVABLE;
> > -#endif
> >       }
> >       return flags;
> >  }
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 6416d08..cd53894 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
> >        * allocating from CMA when over half of the zone's free memory
> >        * is in the CMA area.
> >        */
> > -     if (migratetype == MIGRATE_MOVABLE &&
> > +     if (alloc_flags & ALLOC_CMA &&
> >           zone_page_state(zone, NR_FREE_CMA_PAGES) >
> >           zone_page_state(zone, NR_FREE_PAGES) / 2) {
> >               page = __rmqueue_cma_fallback(zone, order);
> > @@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
> >  retry:
> >       page = __rmqueue_smallest(zone, order, migratetype);
> >       if (unlikely(!page)) {
> > -             if (migratetype == MIGRATE_MOVABLE)
> > +             if (alloc_flags & ALLOC_CMA)
> >                       page = __rmqueue_cma_fallback(zone, order);
> >
> >               if (!page && __rmqueue_fallback(zone, order, migratetype,
> > @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
> >       if (likely(!alloc_harder))
> >               unusable_free += z->nr_reserved_highatomic;
> >
> > -#ifdef CONFIG_CMA
> >       /* If allocation can't use CMA areas don't use free CMA pages */
> > -     if (!(alloc_flags & ALLOC_CMA))
> > +     if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA))
> >               unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
> > -#endif
> >
> >       return unusable_free;
> >  }
> > @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
> >       return alloc_flags;
> >  }
> >
> > +static inline void current_alloc_flags(gfp_t gfp_mask,
> > +                             unsigned int *alloc_flags)
> > +{
> > +     unsigned int pflags = READ_ONCE(current->flags);
> > +
> > +     if (!(pflags & PF_MEMALLOC_NOCMA) &&
> > +             gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> > +             *alloc_flags |= ALLOC_CMA;
> > +}
>
> I don't like the modification through parameter, would just do what
> current_gfp_context() does and return the modified value.
> Also make it a no-op (including no READ_ONCE(current->flags)) if !CONFIG_CMA,
> please.

Okay.

> >  /*
> >   * get_page_from_freelist goes through the zonelist trying to allocate
> >   * a page.
> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
> >       bool no_fallback;
> >
> > +     current_alloc_flags(gfp_mask, &alloc_flags);
>
> I don't see why to move the test here? It will still be executed in the
> fastpath, if that's what you wanted to avoid.

I want to execute it on the fastpath, too. Reason that I moved it here
is that alloc_flags could be reset on slowpath. See the code where
__gfp_pfmemalloc_flags() is on. This is the only place that I can apply
this option to all the allocation paths at once.

Thanks.

> > +
> >  retry:
> >       /*
> >        * Scan zonelist, looking for a zone with enough free.
> > @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
> >       } else if (unlikely(rt_task(current)) && !in_interrupt())
> >               alloc_flags |= ALLOC_HARDER;
> >
> > -#ifdef CONFIG_CMA
> > -     if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> > -             alloc_flags |= ALLOC_CMA;
> > -#endif
>
> I would just replace this here with:
> alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
>
> >       return alloc_flags;
> >  }
> >
> > @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
> >       if (should_fail_alloc_page(gfp_mask, order))
> >               return false;
> >
> > -     if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
> > -             *alloc_flags |= ALLOC_CMA;
>
> And same here... Ah, I see. current_alloc_flags() should probably take a
> migratetype parameter instead of gfp_mask then.
>
> > -
> >       return true;
> >  }
> >
> >
>
Vlastimil Babka July 16, 2020, 7:45 a.m. UTC | #4
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
> 2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>> >  /*
>> >   * get_page_from_freelist goes through the zonelist trying to allocate
>> >   * a page.
>> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
>> >       bool no_fallback;
>> >
>> > +     current_alloc_flags(gfp_mask, &alloc_flags);
>>
>> I don't see why to move the test here? It will still be executed in the
>> fastpath, if that's what you wanted to avoid.
> 
> I want to execute it on the fastpath, too. Reason that I moved it here
> is that alloc_flags could be reset on slowpath. See the code where
> __gfp_pfmemalloc_flags() is on. This is the only place that I can apply
> this option to all the allocation paths at once.

But get_page_from_freelist() might be called multiple times in the slowpath, and
also anyone looking for gfp and alloc flags setup will likely not examine this
function. I don't see a problem in having it in two places that already deal
with alloc_flags setup, as it is now.

> Thanks.
> 
>> > +
>> >  retry:
>> >       /*
>> >        * Scan zonelist, looking for a zone with enough free.
>> > @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
>> >       } else if (unlikely(rt_task(current)) && !in_interrupt())
>> >               alloc_flags |= ALLOC_HARDER;
>> >
>> > -#ifdef CONFIG_CMA
>> > -     if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
>> > -             alloc_flags |= ALLOC_CMA;
>> > -#endif
>>
>> I would just replace this here with:
>> alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
>>
>> >       return alloc_flags;
>> >  }
>> >
>> > @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>> >       if (should_fail_alloc_page(gfp_mask, order))
>> >               return false;
>> >
>> > -     if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
>> > -             *alloc_flags |= ALLOC_CMA;
>>
>> And same here... Ah, I see. current_alloc_flags() should probably take a
>> migratetype parameter instead of gfp_mask then.
>>
>> > -
>> >       return true;
>> >  }
>> >
>> >
>>
>
Joonsoo Kim July 17, 2020, 7:29 a.m. UTC | #5
2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>
> On 7/16/20 9:27 AM, Joonsoo Kim wrote:
> > 2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
> >> >  /*
> >> >   * get_page_from_freelist goes through the zonelist trying to allocate
> >> >   * a page.
> >> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
> >> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
> >> >       bool no_fallback;
> >> >
> >> > +     current_alloc_flags(gfp_mask, &alloc_flags);
> >>
> >> I don't see why to move the test here? It will still be executed in the
> >> fastpath, if that's what you wanted to avoid.
> >
> > I want to execute it on the fastpath, too. Reason that I moved it here
> > is that alloc_flags could be reset on slowpath. See the code where
> > __gfp_pfmemalloc_flags() is on. This is the only place that I can apply
> > this option to all the allocation paths at once.
>
> But get_page_from_freelist() might be called multiple times in the slowpath, and
> also anyone looking for gfp and alloc flags setup will likely not examine this
> function. I don't see a problem in having it in two places that already deal
> with alloc_flags setup, as it is now.

I agree that anyone looking alloc flags will miss that function easily. Okay.
I will place it on its original place, although we now need to add one
more place.
*Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and
__gfp_pfmemalloc_flags().

Thanks.
Vlastimil Babka July 17, 2020, 8:10 a.m. UTC | #6
On 7/17/20 9:29 AM, Joonsoo Kim wrote:
> 2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>>
>> On 7/16/20 9:27 AM, Joonsoo Kim wrote:
>> > 2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>> >> >  /*
>> >> >   * get_page_from_freelist goes through the zonelist trying to allocate
>> >> >   * a page.
>> >> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>> >> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
>> >> >       bool no_fallback;
>> >> >
>> >> > +     current_alloc_flags(gfp_mask, &alloc_flags);
>> >>
>> >> I don't see why to move the test here? It will still be executed in the
>> >> fastpath, if that's what you wanted to avoid.
>> >
>> > I want to execute it on the fastpath, too. Reason that I moved it here
>> > is that alloc_flags could be reset on slowpath. See the code where
>> > __gfp_pfmemalloc_flags() is on. This is the only place that I can apply
>> > this option to all the allocation paths at once.
>>
>> But get_page_from_freelist() might be called multiple times in the slowpath, and
>> also anyone looking for gfp and alloc flags setup will likely not examine this
>> function. I don't see a problem in having it in two places that already deal
>> with alloc_flags setup, as it is now.
> 
> I agree that anyone looking alloc flags will miss that function easily. Okay.
> I will place it on its original place, although we now need to add one
> more place.
> *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and
> __gfp_pfmemalloc_flags().

Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.

/* Avoid allocations with no watermarks from looping endlessly */
   if (tsk_is_oom_victim(current) &&
        (alloc_flags == ALLOC_OOM ||
         (gfp_mask & __GFP_NOMEMALLOC)))
            goto nopage;

Maybe it's simpler to change get_page_from_freelist() then. But document well.

> Thanks.
>
Vlastimil Babka July 17, 2020, 8:15 a.m. UTC | #7
On 7/17/20 10:10 AM, Vlastimil Babka wrote:
> On 7/17/20 9:29 AM, Joonsoo Kim wrote:
>> 2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>>>
>>> On 7/16/20 9:27 AM, Joonsoo Kim wrote:
>>> > 2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>>> >> >  /*
>>> >> >   * get_page_from_freelist goes through the zonelist trying to allocate
>>> >> >   * a page.
>>> >> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>> >> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
>>> >> >       bool no_fallback;
>>> >> >
>>> >> > +     current_alloc_flags(gfp_mask, &alloc_flags);
>>> >>
>>> >> I don't see why to move the test here? It will still be executed in the
>>> >> fastpath, if that's what you wanted to avoid.
>>> >
>>> > I want to execute it on the fastpath, too. Reason that I moved it here
>>> > is that alloc_flags could be reset on slowpath. See the code where
>>> > __gfp_pfmemalloc_flags() is on. This is the only place that I can apply
>>> > this option to all the allocation paths at once.
>>>
>>> But get_page_from_freelist() might be called multiple times in the slowpath, and
>>> also anyone looking for gfp and alloc flags setup will likely not examine this
>>> function. I don't see a problem in having it in two places that already deal
>>> with alloc_flags setup, as it is now.
>> 
>> I agree that anyone looking alloc flags will miss that function easily. Okay.
>> I will place it on its original place, although we now need to add one
>> more place.
>> *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and
>> __gfp_pfmemalloc_flags().
> 
> Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.
> 
> /* Avoid allocations with no watermarks from looping endlessly */
>    if (tsk_is_oom_victim(current) &&
>         (alloc_flags == ALLOC_OOM ||
>          (gfp_mask & __GFP_NOMEMALLOC)))
>             goto nopage;
> 
> Maybe it's simpler to change get_page_from_freelist() then. But document well.

But then we have e.g. should_reclaim_retry() which calls __zone_watermark_ok()
where ALLOC_CMA plays a role too, so that means we should have alloc_mask set up
correctly wrt ALLOC_CMA at the __alloc_pages_slowpath() level...

>> Thanks.
>> 
>
David Laight July 17, 2020, 8:32 a.m. UTC | #8
From: js1304@gmail.com
> Sent: 15 July 2020 06:05
> From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> Currently, preventing cma area in page allocation is implemented by using
> current_gfp_context(). However, there are two problems of this
> implementation.
...
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6416d08..cd53894 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
...
> @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
>  	return alloc_flags;
>  }
> 
> +static inline void current_alloc_flags(gfp_t gfp_mask,
> +				unsigned int *alloc_flags)
> +{
> +	unsigned int pflags = READ_ONCE(current->flags);
> +
> +	if (!(pflags & PF_MEMALLOC_NOCMA) &&
> +		gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> +		*alloc_flags |= ALLOC_CMA;
> +}
> +

I'd guess this would be easier to understand and probably generate
better code if renamed and used as:
	alloc_flags |= can_alloc_cma(gpf_mask);

Given it is a 'static inline' the compiler might end up
generating the same code.
If you needed to clear a flag doing:
	alloc_flags = current_alloc_flags(gpf_mask, alloc_flags);
is much better for non-inlined function.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Joonsoo Kim July 17, 2020, 9:12 a.m. UTC | #9
2020년 7월 17일 (금) 오후 5:15, Vlastimil Babka <vbabka@suse.cz>님이 작성:
>
> On 7/17/20 10:10 AM, Vlastimil Babka wrote:
> > On 7/17/20 9:29 AM, Joonsoo Kim wrote:
> >> 2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka <vbabka@suse.cz>님이 작성:
> >>>
> >>> On 7/16/20 9:27 AM, Joonsoo Kim wrote:
> >>> > 2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka <vbabka@suse.cz>님이 작성:
> >>> >> >  /*
> >>> >> >   * get_page_from_freelist goes through the zonelist trying to allocate
> >>> >> >   * a page.
> >>> >> > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
> >>> >> >       struct pglist_data *last_pgdat_dirty_limit = NULL;
> >>> >> >       bool no_fallback;
> >>> >> >
> >>> >> > +     current_alloc_flags(gfp_mask, &alloc_flags);
> >>> >>
> >>> >> I don't see why to move the test here? It will still be executed in the
> >>> >> fastpath, if that's what you wanted to avoid.
> >>> >
> >>> > I want to execute it on the fastpath, too. Reason that I moved it here
> >>> > is that alloc_flags could be reset on slowpath. See the code where
> >>> > __gfp_pfmemalloc_flags() is on. This is the only place that I can apply
> >>> > this option to all the allocation paths at once.
> >>>
> >>> But get_page_from_freelist() might be called multiple times in the slowpath, and
> >>> also anyone looking for gfp and alloc flags setup will likely not examine this
> >>> function. I don't see a problem in having it in two places that already deal
> >>> with alloc_flags setup, as it is now.
> >>
> >> I agree that anyone looking alloc flags will miss that function easily. Okay.
> >> I will place it on its original place, although we now need to add one
> >> more place.
> >> *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and
> >> __gfp_pfmemalloc_flags().
> >
> > Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.
> >
> > /* Avoid allocations with no watermarks from looping endlessly */
> >    if (tsk_is_oom_victim(current) &&
> >         (alloc_flags == ALLOC_OOM ||
> >          (gfp_mask & __GFP_NOMEMALLOC)))
> >             goto nopage;
> >
> > Maybe it's simpler to change get_page_from_freelist() then. But document well.
>
> But then we have e.g. should_reclaim_retry() which calls __zone_watermark_ok()
> where ALLOC_CMA plays a role too, so that means we should have alloc_mask set up
> correctly wrt ALLOC_CMA at the __alloc_pages_slowpath() level...

Good catch! Hmm... Okay. It would be necessarily handled in three places.
I will fix it on the next version. Anyway, we need some clean-up about
alloc_flags
handling since it looks not good for maintenance.

Thanks.
Joonsoo Kim July 17, 2020, 9:14 a.m. UTC | #10
2020년 7월 17일 (금) 오후 5:32, David Laight <David.Laight@aculab.com>님이 작성:
>
> From: js1304@gmail.com
> > Sent: 15 July 2020 06:05
> > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> >
> > Currently, preventing cma area in page allocation is implemented by using
> > current_gfp_context(). However, there are two problems of this
> > implementation.
> ...
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 6416d08..cd53894 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> ...
> > @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
> >       return alloc_flags;
> >  }
> >
> > +static inline void current_alloc_flags(gfp_t gfp_mask,
> > +                             unsigned int *alloc_flags)
> > +{
> > +     unsigned int pflags = READ_ONCE(current->flags);
> > +
> > +     if (!(pflags & PF_MEMALLOC_NOCMA) &&
> > +             gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> > +             *alloc_flags |= ALLOC_CMA;
> > +}
> > +
>
> I'd guess this would be easier to understand and probably generate
> better code if renamed and used as:
>         alloc_flags |= can_alloc_cma(gpf_mask);
>
> Given it is a 'static inline' the compiler might end up
> generating the same code.
> If you needed to clear a flag doing:
>         alloc_flags = current_alloc_flags(gpf_mask, alloc_flags);
> is much better for non-inlined function.

Vlastimil suggested this way and I have agreed with that. Anyway,
thanks for the suggestion.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44ad5b7..a73847a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -191,10 +191,6 @@  static inline gfp_t current_gfp_context(gfp_t flags)
 			flags &= ~(__GFP_IO | __GFP_FS);
 		else if (pflags & PF_MEMALLOC_NOFS)
 			flags &= ~__GFP_FS;
-#ifdef CONFIG_CMA
-		if (pflags & PF_MEMALLOC_NOCMA)
-			flags &= ~__GFP_MOVABLE;
-#endif
 	}
 	return flags;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6416d08..cd53894 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2791,7 +2791,7 @@  __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 	 * allocating from CMA when over half of the zone's free memory
 	 * is in the CMA area.
 	 */
-	if (migratetype == MIGRATE_MOVABLE &&
+	if (alloc_flags & ALLOC_CMA &&
 	    zone_page_state(zone, NR_FREE_CMA_PAGES) >
 	    zone_page_state(zone, NR_FREE_PAGES) / 2) {
 		page = __rmqueue_cma_fallback(zone, order);
@@ -2802,7 +2802,7 @@  __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 retry:
 	page = __rmqueue_smallest(zone, order, migratetype);
 	if (unlikely(!page)) {
-		if (migratetype == MIGRATE_MOVABLE)
+		if (alloc_flags & ALLOC_CMA)
 			page = __rmqueue_cma_fallback(zone, order);
 
 		if (!page && __rmqueue_fallback(zone, order, migratetype,
@@ -3502,11 +3502,9 @@  static inline long __zone_watermark_unusable_free(struct zone *z,
 	if (likely(!alloc_harder))
 		unusable_free += z->nr_reserved_highatomic;
 
-#ifdef CONFIG_CMA
 	/* If allocation can't use CMA areas don't use free CMA pages */
-	if (!(alloc_flags & ALLOC_CMA))
+	if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA))
 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
 
 	return unusable_free;
 }
@@ -3693,6 +3691,16 @@  alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
 	return alloc_flags;
 }
 
+static inline void current_alloc_flags(gfp_t gfp_mask,
+				unsigned int *alloc_flags)
+{
+	unsigned int pflags = READ_ONCE(current->flags);
+
+	if (!(pflags & PF_MEMALLOC_NOCMA) &&
+		gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+		*alloc_flags |= ALLOC_CMA;
+}
+
 /*
  * get_page_from_freelist goes through the zonelist trying to allocate
  * a page.
@@ -3706,6 +3714,8 @@  get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat_dirty_limit = NULL;
 	bool no_fallback;
 
+	current_alloc_flags(gfp_mask, &alloc_flags);
+
 retry:
 	/*
 	 * Scan zonelist, looking for a zone with enough free.
@@ -4339,10 +4349,6 @@  gfp_to_alloc_flags(gfp_t gfp_mask)
 	} else if (unlikely(rt_task(current)) && !in_interrupt())
 		alloc_flags |= ALLOC_HARDER;
 
-#ifdef CONFIG_CMA
-	if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-		alloc_flags |= ALLOC_CMA;
-#endif
 	return alloc_flags;
 }
 
@@ -4808,9 +4814,6 @@  static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 	if (should_fail_alloc_page(gfp_mask, order))
 		return false;
 
-	if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-		*alloc_flags |= ALLOC_CMA;
-
 	return true;
 }