[2/4] mm/compaction: enable mobile-page migration
diff mbox

Message ID 1436776519-17337-3-git-send-email-gioh.kim@lge.com
State New
Headers show

Commit Message

??? July 13, 2015, 8:35 a.m. UTC
From: Gioh Kim <gurugio@hanmail.net>

Add framework to register callback functions and check page mobility.
There are some modes for page isolation so that isolate interface
has arguments of page address and isolation mode while putback
interface has only page address as argument.

Signed-off-by: Gioh Kim <gioh.kim@lge.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
---
 fs/proc/page.c                         |  3 ++
 include/linux/compaction.h             | 80 ++++++++++++++++++++++++++++++++++
 include/linux/fs.h                     |  2 +
 include/linux/page-flags.h             | 19 ++++++++
 include/uapi/linux/kernel-page-flags.h |  1 +
 5 files changed, 105 insertions(+)

Comments

Vlastimil Babka July 27, 2015, 1:55 p.m. UTC | #1
On 07/13/2015 10:35 AM, Gioh Kim wrote:
> From: Gioh Kim <gurugio@hanmail.net>
>
> Add framework to register callback functions and check page mobility.
> There are some modes for page isolation so that isolate interface
> has arguments of page address and isolation mode while putback
> interface has only page address as argument.

Note that unlike what subject suggest, this doesn't really enable 
mobile-page migration inside compaction, since that only happens with 
patch 3. This might theoretically affect some cherry-pick backports that 
don't care about balloon pages. I can imagine that can easily happen in 
the world of mobile devices?
It would thus be somewhat cleaner if this patch was complete in that sense.

> Signed-off-by: Gioh Kim <gioh.kim@lge.com>
> Acked-by: Rafael Aquini <aquini@redhat.com>
> ---
>   fs/proc/page.c                         |  3 ++
>   include/linux/compaction.h             | 80 ++++++++++++++++++++++++++++++++++
>   include/linux/fs.h                     |  2 +
>   include/linux/page-flags.h             | 19 ++++++++
>   include/uapi/linux/kernel-page-flags.h |  1 +
>   5 files changed, 105 insertions(+)
>
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 7eee2d8..a4f5a00 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
>   	if (PageBalloon(page))
>   		u |= 1 << KPF_BALLOON;
>
> +	if (PageMobile(page))
> +		u |= 1 << KPF_MOBILE;

PageMovable() would probably be as good a name and correspond to 
MIGRATE_MOVABLE somewhat, unlike a completely new term. Whatever driver 
starts to using this should probably change allocation flags to allocate 
MIGRATE_MOVABLE, so that it works fine with what fragmentation avoidance 
expects. Guess I should have said that earlier, but can you still 
reconsider?

> +
>   	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
>
>   	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index aa8f61c..f693072 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -1,6 +1,9 @@
>   #ifndef _LINUX_COMPACTION_H
>   #define _LINUX_COMPACTION_H
>
> +#include <linux/page-flags.h>
> +#include <linux/pagemap.h>
> +
>   /* Return values for compact_zone() and try_to_compact_pages() */
>   /* compaction didn't start as it was deferred due to past failures */
>   #define COMPACT_DEFERRED	0
> @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone, int order,
>   				bool alloc_success);
>   extern bool compaction_restarting(struct zone *zone, int order);
>
> +static inline bool mobile_page(struct page *page)
> +{
> +	return page->mapping &&	(PageMobile(page) || PageBalloon(page));
> +}

I would put this definition to linux/page-flags.h and rename it to 
page_mobile (or better page_movable()), which is more common ordering.

> +
> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)

Does this have to be in compaction.h? The only user is compaction.c so 
probably move it there, and if there ever is another module using this 
in the future, we can move it to a more appropriate place and declare it 
in e.g. mm/internal.h.

> +{
> +	bool ret = false;
> +
> +	/*
> +	 * Avoid burning cycles with pages that are yet under __free_pages(),
> +	 * or just got freed under us.
> +	 *
> +	 * In case we 'win' a race for a mobile page being freed under us and
> +	 * raise its refcount preventing __free_pages() from doing its job
> +	 * the put_page() at the end of this block will take care of
> +	 * release this page, thus avoiding a nasty leakage.
> +	 */
> +	if (unlikely(!get_page_unless_zero(page)))
> +		goto out;
> +
> +	/*
> +	 * As mobile pages are not isolated from LRU lists, concurrent
> +	 * compaction threads can race against page migration functions
> +	 * as well as race against the releasing a page.
> +	 *
> +	 * In order to avoid having an already isolated mobile page
> +	 * being (wrongly) re-isolated while it is under migration,
> +	 * or to avoid attempting to isolate pages being released,
> +	 * lets be sure we have the page lock
> +	 * before proceeding with the mobile page isolation steps.
> +	 */
> +	if (unlikely(!trylock_page(page)))
> +		goto out_putpage;
> +
> +	if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
> +		goto out_not_isolated;
> +	ret = page->mapping->a_ops->isolatepage(page, mode);
> +	if (!ret)
> +		goto out_not_isolated;
> +	unlock_page(page);
> +	return ret;
> +
> +out_not_isolated:
> +	unlock_page(page);
> +out_putpage:
> +	put_page(page);
> +out:
> +	return ret;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)

Likewise, this could go to migrate.c. Or maybe together with 
isolate_mobilepage() if you don't want to split them.

> +{
> +	/*
> +	 * 'lock_page()' stabilizes the page and prevents races against
> +	 * concurrent isolation threads attempting to re-isolate it.
> +	 */
> +	lock_page(page);
> +	if (page->mapping && page->mapping->a_ops->putbackpage)
> +		page->mapping->a_ops->putbackpage(page);
> +	unlock_page(page);
> +	/* drop the extra ref count taken for mobile page isolation */
> +	put_page(page);
> +}
>   #else
>   static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
>   			unsigned int order, int alloc_flags,
> @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone *zone, int order)
>   	return true;
>   }
>
> +static inline bool mobile_page(struct page *page)
> +{
> +	return false;
> +}
> +
> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> +{
> +	return false;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)
> +{
> +}
>   #endif /* CONFIG_COMPACTION */
>
>   #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a0653e5..2cc4b24 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -396,6 +396,8 @@ struct address_space_operations {
>   	 */
>   	int (*migratepage) (struct address_space *,
>   			struct page *, struct page *, enum migrate_mode);
> +	bool (*isolatepage) (struct page *, isolate_mode_t);
> +	void (*putbackpage) (struct page *);
>   	int (*launder_page) (struct page *);
>   	int (*is_partially_uptodate) (struct page *, unsigned long,
>   					unsigned long);
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index f34e040..abef145 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page *page)
>   	atomic_set(&page->_mapcount, -1);
>   }
>
> +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
> +
> +static inline int PageMobile(struct page *page)
> +{
> +	return atomic_read(&page->_mapcount) == PAGE_MOBILE_MAPCOUNT_VALUE;
> +}
> +
> +static inline void __SetPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
> +	atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
> +}
> +
> +static inline void __ClearPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(!PageMobile(page), page);
> +	atomic_set(&page->_mapcount, -1);
> +}
> +
>   /*
>    * If network-based swap is enabled, sl*b must keep track of whether pages
>    * were allocated from pfmemalloc reserves.
> diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
> index a6c4962..d50d9e8 100644
> --- a/include/uapi/linux/kernel-page-flags.h
> +++ b/include/uapi/linux/kernel-page-flags.h
> @@ -33,6 +33,7 @@
>   #define KPF_THP			22
>   #define KPF_BALLOON		23
>   #define KPF_ZERO_PAGE		24
> +#define KPF_MOBILE		25
>
>
>   #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
>
Konstantin Khlebnikov July 27, 2015, 6:56 p.m. UTC | #2
On Mon, Jul 27, 2015 at 4:55 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
> On 07/13/2015 10:35 AM, Gioh Kim wrote:
>>
>> From: Gioh Kim <gurugio@hanmail.net>
>>
>> Add framework to register callback functions and check page mobility.
>> There are some modes for page isolation so that isolate interface
>> has arguments of page address and isolation mode while putback
>> interface has only page address as argument.
>
>
> Note that unlike what subject suggest, this doesn't really enable
> mobile-page migration inside compaction, since that only happens with patch
> 3. This might theoretically affect some cherry-pick backports that don't
> care about balloon pages. I can imagine that can easily happen in the world
> of mobile devices?
> It would thus be somewhat cleaner if this patch was complete in that sense.
>
>> Signed-off-by: Gioh Kim <gioh.kim@lge.com>
>> Acked-by: Rafael Aquini <aquini@redhat.com>
>> ---
>>   fs/proc/page.c                         |  3 ++
>>   include/linux/compaction.h             | 80
>> ++++++++++++++++++++++++++++++++++
>>   include/linux/fs.h                     |  2 +
>>   include/linux/page-flags.h             | 19 ++++++++
>>   include/uapi/linux/kernel-page-flags.h |  1 +
>>   5 files changed, 105 insertions(+)
>>
>> diff --git a/fs/proc/page.c b/fs/proc/page.c
>> index 7eee2d8..a4f5a00 100644
>> --- a/fs/proc/page.c
>> +++ b/fs/proc/page.c
>> @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
>>         if (PageBalloon(page))
>>                 u |= 1 << KPF_BALLOON;
>>
>> +       if (PageMobile(page))
>> +               u |= 1 << KPF_MOBILE;
>
>
> PageMovable() would probably be as good a name and correspond to
> MIGRATE_MOVABLE somewhat, unlike a completely new term. Whatever driver
> starts to using this should probably change allocation flags to allocate
> MIGRATE_MOVABLE, so that it works fine with what fragmentation avoidance
> expects. Guess I should have said that earlier, but can you still
> reconsider?

Well, I've suggested to name it "mobile" because there's already a lot of things
called "movable". Mobile pages are special subset of movable pages: they
are non-lru pages and define their own rules of moving in address
space operations.

Also there's a little pun: I guess main user will zram which is used
mostly in embedded/mobile devices.

>
>> +
>>         u |= kpf_copy_bit(k, KPF_LOCKED,        PG_locked);
>>
>>         u |= kpf_copy_bit(k, KPF_SLAB,          PG_slab);
>> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
>> index aa8f61c..f693072 100644
>> --- a/include/linux/compaction.h
>> +++ b/include/linux/compaction.h
>> @@ -1,6 +1,9 @@
>>   #ifndef _LINUX_COMPACTION_H
>>   #define _LINUX_COMPACTION_H
>>
>> +#include <linux/page-flags.h>
>> +#include <linux/pagemap.h>
>> +
>>   /* Return values for compact_zone() and try_to_compact_pages() */
>>   /* compaction didn't start as it was deferred due to past failures */
>>   #define COMPACT_DEFERRED      0
>> @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone,
>> int order,
>>                                 bool alloc_success);
>>   extern bool compaction_restarting(struct zone *zone, int order);
>>
>> +static inline bool mobile_page(struct page *page)
>> +{
>> +       return page->mapping && (PageMobile(page) || PageBalloon(page));
>> +}
>
>
> I would put this definition to linux/page-flags.h and rename it to
> page_mobile (or better page_movable()), which is more common ordering.
>
>> +
>> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t
>> mode)
>
>
> Does this have to be in compaction.h? The only user is compaction.c so
> probably move it there, and if there ever is another module using this in
> the future, we can move it to a more appropriate place and declare it in
> e.g. mm/internal.h.
>
>
>> +{
>> +       bool ret = false;
>> +
>> +       /*
>> +        * Avoid burning cycles with pages that are yet under
>> __free_pages(),
>> +        * or just got freed under us.
>> +        *
>> +        * In case we 'win' a race for a mobile page being freed under us
>> and
>> +        * raise its refcount preventing __free_pages() from doing its job
>> +        * the put_page() at the end of this block will take care of
>> +        * release this page, thus avoiding a nasty leakage.
>> +        */
>> +       if (unlikely(!get_page_unless_zero(page)))
>> +               goto out;
>> +
>> +       /*
>> +        * As mobile pages are not isolated from LRU lists, concurrent
>> +        * compaction threads can race against page migration functions
>> +        * as well as race against the releasing a page.
>> +        *
>> +        * In order to avoid having an already isolated mobile page
>> +        * being (wrongly) re-isolated while it is under migration,
>> +        * or to avoid attempting to isolate pages being released,
>> +        * lets be sure we have the page lock
>> +        * before proceeding with the mobile page isolation steps.
>> +        */
>> +       if (unlikely(!trylock_page(page)))
>> +               goto out_putpage;
>> +
>> +       if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
>> +               goto out_not_isolated;
>> +       ret = page->mapping->a_ops->isolatepage(page, mode);
>> +       if (!ret)
>> +               goto out_not_isolated;
>> +       unlock_page(page);
>> +       return ret;
>> +
>> +out_not_isolated:
>> +       unlock_page(page);
>> +out_putpage:
>> +       put_page(page);
>> +out:
>> +       return ret;
>> +}
>> +
>> +static inline void putback_mobilepage(struct page *page)
>
>
> Likewise, this could go to migrate.c. Or maybe together with
> isolate_mobilepage() if you don't want to split them.
>
>
>> +{
>> +       /*
>> +        * 'lock_page()' stabilizes the page and prevents races against
>> +        * concurrent isolation threads attempting to re-isolate it.
>> +        */
>> +       lock_page(page);
>> +       if (page->mapping && page->mapping->a_ops->putbackpage)
>> +               page->mapping->a_ops->putbackpage(page);
>> +       unlock_page(page);
>> +       /* drop the extra ref count taken for mobile page isolation */
>> +       put_page(page);
>> +}
>>   #else
>>   static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
>>                         unsigned int order, int alloc_flags,
>> @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone
>> *zone, int order)
>>         return true;
>>   }
>>
>> +static inline bool mobile_page(struct page *page)
>> +{
>> +       return false;
>> +}
>> +
>> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t
>> mode)
>> +{
>> +       return false;
>> +}
>> +
>> +static inline void putback_mobilepage(struct page *page)
>> +{
>> +}
>>   #endif /* CONFIG_COMPACTION */
>>
>>   #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) &&
>> defined(CONFIG_NUMA)
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index a0653e5..2cc4b24 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -396,6 +396,8 @@ struct address_space_operations {
>>          */
>>         int (*migratepage) (struct address_space *,
>>                         struct page *, struct page *, enum migrate_mode);
>> +       bool (*isolatepage) (struct page *, isolate_mode_t);
>> +       void (*putbackpage) (struct page *);
>>         int (*launder_page) (struct page *);
>>         int (*is_partially_uptodate) (struct page *, unsigned long,
>>                                         unsigned long);
>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>> index f34e040..abef145 100644
>> --- a/include/linux/page-flags.h
>> +++ b/include/linux/page-flags.h
>> @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page
>> *page)
>>         atomic_set(&page->_mapcount, -1);
>>   }
>>
>> +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
>> +
>> +static inline int PageMobile(struct page *page)
>> +{
>> +       return atomic_read(&page->_mapcount) ==
>> PAGE_MOBILE_MAPCOUNT_VALUE;
>> +}
>> +
>> +static inline void __SetPageMobile(struct page *page)
>> +{
>> +       VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
>> +       atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
>> +}
>> +
>> +static inline void __ClearPageMobile(struct page *page)
>> +{
>> +       VM_BUG_ON_PAGE(!PageMobile(page), page);
>> +       atomic_set(&page->_mapcount, -1);
>> +}
>> +
>>   /*
>>    * If network-based swap is enabled, sl*b must keep track of whether
>> pages
>>    * were allocated from pfmemalloc reserves.
>> diff --git a/include/uapi/linux/kernel-page-flags.h
>> b/include/uapi/linux/kernel-page-flags.h
>> index a6c4962..d50d9e8 100644
>> --- a/include/uapi/linux/kernel-page-flags.h
>> +++ b/include/uapi/linux/kernel-page-flags.h
>> @@ -33,6 +33,7 @@
>>   #define KPF_THP                       22
>>   #define KPF_BALLOON           23
>>   #define KPF_ZERO_PAGE         24
>> +#define KPF_MOBILE             25
>>
>>
>>   #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
>>
>
??? July 28, 2015, 12:21 a.m. UTC | #3
> On Mon, Jul 27, 2015 at 4:55 PM, Vlastimil Babka <vbabka@suse.cz> wrote:
>> On 07/13/2015 10:35 AM, Gioh Kim wrote:
>>>
>>> From: Gioh Kim <gurugio@hanmail.net>
>>>
>>> Add framework to register callback functions and check page mobility.
>>> There are some modes for page isolation so that isolate interface
>>> has arguments of page address and isolation mode while putback
>>> interface has only page address as argument.
>>
>>
>> Note that unlike what subject suggest, this doesn't really enable
>> mobile-page migration inside compaction, since that only happens with patch
>> 3. This might theoretically affect some cherry-pick backports that don't
>> care about balloon pages. I can imagine that can easily happen in the world
>> of mobile devices?
>> It would thus be somewhat cleaner if this patch was complete in that sense.

You have a point.
Current 2/4 patch is lack of calling migrate/isolate.
It is not complete without 3/4.

I'll add calling migrate/isolate() at next spin.


>>
>>> Signed-off-by: Gioh Kim <gioh.kim@lge.com>
>>> Acked-by: Rafael Aquini <aquini@redhat.com>
>>> ---
>>>    fs/proc/page.c                         |  3 ++
>>>    include/linux/compaction.h             | 80
>>> ++++++++++++++++++++++++++++++++++
>>>    include/linux/fs.h                     |  2 +
>>>    include/linux/page-flags.h             | 19 ++++++++
>>>    include/uapi/linux/kernel-page-flags.h |  1 +
>>>    5 files changed, 105 insertions(+)
>>>
>>> diff --git a/fs/proc/page.c b/fs/proc/page.c
>>> index 7eee2d8..a4f5a00 100644
>>> --- a/fs/proc/page.c
>>> +++ b/fs/proc/page.c
>>> @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
>>>          if (PageBalloon(page))
>>>                  u |= 1 << KPF_BALLOON;
>>>
>>> +       if (PageMobile(page))
>>> +               u |= 1 << KPF_MOBILE;
>>
>>
>> PageMovable() would probably be as good a name and correspond to
>> MIGRATE_MOVABLE somewhat, unlike a completely new term. Whatever driver
>> starts to using this should probably change allocation flags to allocate
>> MIGRATE_MOVABLE, so that it works fine with what fragmentation avoidance
>> expects. Guess I should have said that earlier, but can you still
>> reconsider?
>
> Well, I've suggested to name it "mobile" because there's already a lot of things
> called "movable". Mobile pages are special subset of movable pages: they
> are non-lru pages and define their own rules of moving in address
> space operations.
>
> Also there's a little pun: I guess main user will zram which is used
> mostly in embedded/mobile devices.

I like "mobile".
"movable" is for pages allocated with GFP_MOVABLE.
I think "movable" is a little ambiguous in this situation.

>
>>
>>> +
>>>          u |= kpf_copy_bit(k, KPF_LOCKED,        PG_locked);
>>>
>>>          u |= kpf_copy_bit(k, KPF_SLAB,          PG_slab);
>>> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
>>> index aa8f61c..f693072 100644
>>> --- a/include/linux/compaction.h
>>> +++ b/include/linux/compaction.h
>>> @@ -1,6 +1,9 @@
>>>    #ifndef _LINUX_COMPACTION_H
>>>    #define _LINUX_COMPACTION_H
>>>
>>> +#include <linux/page-flags.h>
>>> +#include <linux/pagemap.h>
>>> +
>>>    /* Return values for compact_zone() and try_to_compact_pages() */
>>>    /* compaction didn't start as it was deferred due to past failures */
>>>    #define COMPACT_DEFERRED      0
>>> @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone,
>>> int order,
>>>                                  bool alloc_success);
>>>    extern bool compaction_restarting(struct zone *zone, int order);
>>>
>>> +static inline bool mobile_page(struct page *page)
>>> +{
>>> +       return page->mapping && (PageMobile(page) || PageBalloon(page));
>>> +}
>>
>>
>> I would put this definition to linux/page-flags.h and rename it to
>> page_mobile (or better page_movable()), which is more common ordering.
>>
>>> +
>>> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t
>>> mode)
>>
>>
>> Does this have to be in compaction.h? The only user is compaction.c so
>> probably move it there, and if there ever is another module using this in
>> the future, we can move it to a more appropriate place and declare it in
>> e.g. mm/internal.h.

I think compaction.c is suitable.

>>
>>
>>> +{
>>> +       bool ret = false;
>>> +
>>> +       /*
>>> +        * Avoid burning cycles with pages that are yet under
>>> __free_pages(),
>>> +        * or just got freed under us.
>>> +        *
>>> +        * In case we 'win' a race for a mobile page being freed under us
>>> and
>>> +        * raise its refcount preventing __free_pages() from doing its job
>>> +        * the put_page() at the end of this block will take care of
>>> +        * release this page, thus avoiding a nasty leakage.
>>> +        */
>>> +       if (unlikely(!get_page_unless_zero(page)))
>>> +               goto out;
>>> +
>>> +       /*
>>> +        * As mobile pages are not isolated from LRU lists, concurrent
>>> +        * compaction threads can race against page migration functions
>>> +        * as well as race against the releasing a page.
>>> +        *
>>> +        * In order to avoid having an already isolated mobile page
>>> +        * being (wrongly) re-isolated while it is under migration,
>>> +        * or to avoid attempting to isolate pages being released,
>>> +        * lets be sure we have the page lock
>>> +        * before proceeding with the mobile page isolation steps.
>>> +        */
>>> +       if (unlikely(!trylock_page(page)))
>>> +               goto out_putpage;
>>> +
>>> +       if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
>>> +               goto out_not_isolated;
>>> +       ret = page->mapping->a_ops->isolatepage(page, mode);
>>> +       if (!ret)
>>> +               goto out_not_isolated;
>>> +       unlock_page(page);
>>> +       return ret;
>>> +
>>> +out_not_isolated:
>>> +       unlock_page(page);
>>> +out_putpage:
>>> +       put_page(page);
>>> +out:
>>> +       return ret;
>>> +}
>>> +
>>> +static inline void putback_mobilepage(struct page *page)
>>
>>
>> Likewise, this could go to migrate.c. Or maybe together with
>> isolate_mobilepage() if you don't want to split them.
I got it.

>>
>>
>>> +{
>>> +       /*
>>> +        * 'lock_page()' stabilizes the page and prevents races against
>>> +        * concurrent isolation threads attempting to re-isolate it.
>>> +        */
>>> +       lock_page(page);
>>> +       if (page->mapping && page->mapping->a_ops->putbackpage)
>>> +               page->mapping->a_ops->putbackpage(page);
>>> +       unlock_page(page);
>>> +       /* drop the extra ref count taken for mobile page isolation */
>>> +       put_page(page);
>>> +}
>>>    #else
>>>    static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
>>>                          unsigned int order, int alloc_flags,
>>> @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone
>>> *zone, int order)
>>>          return true;
>>>    }
>>>
>>> +static inline bool mobile_page(struct page *page)
>>> +{
>>> +       return false;
>>> +}
>>> +
>>> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t
>>> mode)
>>> +{
>>> +       return false;
>>> +}
>>> +
>>> +static inline void putback_mobilepage(struct page *page)
>>> +{
>>> +}
>>>    #endif /* CONFIG_COMPACTION */
>>>
>>>    #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) &&
>>> defined(CONFIG_NUMA)
>>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>>> index a0653e5..2cc4b24 100644
>>> --- a/include/linux/fs.h
>>> +++ b/include/linux/fs.h
>>> @@ -396,6 +396,8 @@ struct address_space_operations {
>>>           */
>>>          int (*migratepage) (struct address_space *,
>>>                          struct page *, struct page *, enum migrate_mode);
>>> +       bool (*isolatepage) (struct page *, isolate_mode_t);
>>> +       void (*putbackpage) (struct page *);
>>>          int (*launder_page) (struct page *);
>>>          int (*is_partially_uptodate) (struct page *, unsigned long,
>>>                                          unsigned long);
>>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>>> index f34e040..abef145 100644
>>> --- a/include/linux/page-flags.h
>>> +++ b/include/linux/page-flags.h
>>> @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page
>>> *page)
>>>          atomic_set(&page->_mapcount, -1);
>>>    }
>>>
>>> +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
>>> +
>>> +static inline int PageMobile(struct page *page)
>>> +{
>>> +       return atomic_read(&page->_mapcount) ==
>>> PAGE_MOBILE_MAPCOUNT_VALUE;
>>> +}
>>> +
>>> +static inline void __SetPageMobile(struct page *page)
>>> +{
>>> +       VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
>>> +       atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
>>> +}
>>> +
>>> +static inline void __ClearPageMobile(struct page *page)
>>> +{
>>> +       VM_BUG_ON_PAGE(!PageMobile(page), page);
>>> +       atomic_set(&page->_mapcount, -1);
>>> +}
>>> +
>>>    /*
>>>     * If network-based swap is enabled, sl*b must keep track of whether
>>> pages
>>>     * were allocated from pfmemalloc reserves.
>>> diff --git a/include/uapi/linux/kernel-page-flags.h
>>> b/include/uapi/linux/kernel-page-flags.h
>>> index a6c4962..d50d9e8 100644
>>> --- a/include/uapi/linux/kernel-page-flags.h
>>> +++ b/include/uapi/linux/kernel-page-flags.h
>>> @@ -33,6 +33,7 @@
>>>    #define KPF_THP                       22
>>>    #define KPF_BALLOON           23
>>>    #define KPF_ZERO_PAGE         24
>>> +#define KPF_MOBILE             25
>>>
>>>
>>>    #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
>>>
>>
>
Mel Gorman July 29, 2015, 10:52 a.m. UTC | #4
On Mon, Jul 13, 2015 at 05:35:17PM +0900, Gioh Kim wrote:
> From: Gioh Kim <gurugio@hanmail.net>
> 
> Add framework to register callback functions and check page mobility.
> There are some modes for page isolation so that isolate interface
> has arguments of page address and isolation mode while putback
> interface has only page address as argument.
> 
> Signed-off-by: Gioh Kim <gioh.kim@lge.com>
> Acked-by: Rafael Aquini <aquini@redhat.com>
> ---
>  fs/proc/page.c                         |  3 ++
>  include/linux/compaction.h             | 80 ++++++++++++++++++++++++++++++++++
>  include/linux/fs.h                     |  2 +
>  include/linux/page-flags.h             | 19 ++++++++
>  include/uapi/linux/kernel-page-flags.h |  1 +
>  5 files changed, 105 insertions(+)
> 

An update to the address_space operations in
Documentation/filesystems/Locking and Documentation/filesystems/vfs.txt
is required. I was going to say "recommended" but it really is required.
The responsibilities and locking rules of these interfaces must be extremely
clear as you may be asking multiple driver authors to use this interface.

For example, it must be clear to users of these interfaces that the isolate
must prevent any parallel updates to the data, prevent parallel frees and
halt attempted accesses until migration is complete. It will not always
be obvious how to do this and may not be obvious that it is required if
someone has not experienced the joy that is mm/migrate.c. For example,
mapped LRU pages get unmapped with migration entries so faults that access
the data wait until the migration completes. Balloons, zram, graphics will
need to provide similar guarantees.

As data accesses may now sleep due to migration, drivers will need to
be careful that it is safe to sleep and suggest that they do not attempt
to spin.

Depending on how it is implemented, the putback may be responsible for
waking up any tasks waiting to access the page.

There are going to be more hazards here which is why documentation to spell
it out is ideal and that zram gets converted to find all the locking and
access pitfalls.

> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 7eee2d8..a4f5a00 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
>  	if (PageBalloon(page))
>  		u |= 1 << KPF_BALLOON;
>  
> +	if (PageMobile(page))
> +		u |= 1 << KPF_MOBILE;
> +
>  	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
>  
>  	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index aa8f61c..f693072 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -1,6 +1,9 @@
>  #ifndef _LINUX_COMPACTION_H
>  #define _LINUX_COMPACTION_H
>  
> +#include <linux/page-flags.h>
> +#include <linux/pagemap.h>
> +
>  /* Return values for compact_zone() and try_to_compact_pages() */
>  /* compaction didn't start as it was deferred due to past failures */
>  #define COMPACT_DEFERRED	0
> @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone, int order,
>  				bool alloc_success);
>  extern bool compaction_restarting(struct zone *zone, int order);
>  
> +static inline bool mobile_page(struct page *page)
> +{
> +	return page->mapping &&	(PageMobile(page) || PageBalloon(page));
> +}
> +

This creates an oddity because now there is a disconnect between movable
and mobile pages. They are similar but different.

o A Mobile page is a driver-owned page that has the address space
  operations that enable migration.

o A Movable page is generally a page mapped by page tables that can be
  migrated using the existing mechanisms.

The concepts should be unified.

A Mobile page is a driver-owner page that has the address space
operations that enable migration. Pages that are mapped by userspace are
considered to be mobile with the following properties

	a_ops->isolatepage isolates the page from the LRU to prevent
	parallel reclaim. It is unmapped from page tables using rmap
	with PTEs replaced by migration entries. Any attempt to access
	the page will wait in page fault until the migration completes.

	a_ops->putbackpage removes the migration entries and wakes up
	all waiters in page fault.

	A further property is that allocation of this type specified
	__GFP_MOVABLE to group them all together. They are the most mobile
	page category that are cheapest to move. In theory, all mobile
	pages could be allocated __GFP_MOVABLE if it's known in advance
	the page->mapping will have the necessary operations in the
	future.

?

A complicating factor is that a Movable page as it's currently defined
may not have a page->mapping. You'd have to continue replying on PageLRU to
identify them as a special page that has access to the necessary isolateppage
and putbackpage helpers. However, at least we would have a single view
on what a movable page is.

Additional note: After I wrote the above, I read the other reviews. I
	did not read them in advance so I'd have a fresh view. I see
	Konstantin Khlebnikov has been active and he suggested the mobility
	naming to distinguish between the LRU pages. I simply disagree
	even though I see his reasoning. I do not think we should have a
	special case of LRU pages and everything else. Instead we should
	have a single concept of movability (or mobility) with the special
	case being that LRU pages without an aops can directly call the
	necessary helpers.

> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> +{
> +	bool ret = false;
> +
> +	/*
> +	 * Avoid burning cycles with pages that are yet under __free_pages(),
> +	 * or just got freed under us.
> +	 *
> +	 * In case we 'win' a race for a mobile page being freed under us and
> +	 * raise its refcount preventing __free_pages() from doing its job
> +	 * the put_page() at the end of this block will take care of
> +	 * release this page, thus avoiding a nasty leakage.
> +	 */
> +	if (unlikely(!get_page_unless_zero(page)))
> +		goto out;
> +

Ok.

> +	/*
> +	 * As mobile pages are not isolated from LRU lists, concurrent
> +	 * compaction threads can race against page migration functions
> +	 * as well as race against the releasing a page.
> +	 *
> +	 * In order to avoid having an already isolated mobile page
> +	 * being (wrongly) re-isolated while it is under migration,
> +	 * or to avoid attempting to isolate pages being released,
> +	 * lets be sure we have the page lock
> +	 * before proceeding with the mobile page isolation steps.
> +	 */
> +	if (unlikely(!trylock_page(page)))
> +		goto out_putpage;
> +

There are some big assumptions here. It assumes that any users of this
interface can prevent parallel compaction attempts via the page lock. It
also assumes that the caller does not recursively hold the page lock already.
It would be incompatible with how LRU pages are isolated as they co-ordinate
via the zone->lru_lock.

I suspect you went with the page lock because it happens to be what the
balloon driver needed which is fine, but potentially pastes us into a
corner later.

I don't see a way this could be generically handled for arbitrary subsystems
unless you put responsibility for the locking inside a_ops->isolatepage. That
still works for existing movable pages if you give it a pseudo a_ops for
pages without page->mapping.

Because of this, I really think it would benefit if there was a patch
3 that converted the existing migration of LRU pages to use the aops
interface. This could be done via a fake address_space that only populates
the migration interfaces and is used for LRU pages. Then remove the LRU
special casing in compaction and migration before converting the balloon
driver and zram. This will rattle out any conceivable locking hazard and
unify migration in general. I recognise that it's a lot of heavy lifting
unfortunately but it leaves you with a partial solution to your problem
(zram in the way) and paves the way for drivers to reliably convert.

> +	if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
> +		goto out_not_isolated;
> +	ret = page->mapping->a_ops->isolatepage(page, mode);
> +	if (!ret)
> +		goto out_not_isolated;
> +	unlock_page(page);
> +	return ret;
> +
> +out_not_isolated:
> +	unlock_page(page);
> +out_putpage:
> +	put_page(page);
> +out:
> +	return ret;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)
> +{
> +	/*
> +	 * 'lock_page()' stabilizes the page and prevents races against
> +	 * concurrent isolation threads attempting to re-isolate it.
> +	 */
> +	lock_page(page);
> +	if (page->mapping && page->mapping->a_ops->putbackpage)
> +		page->mapping->a_ops->putbackpage(page);
> +	unlock_page(page);
> +	/* drop the extra ref count taken for mobile page isolation */
> +	put_page(page);
> +}

Similar comments about the locking, I think the a_ops handler needs to be
responsible. We should not expand the role of the page->lock in the
general case.

>  #else
>  static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
>  			unsigned int order, int alloc_flags,
> @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone *zone, int order)
>  	return true;
>  }
>  
> +static inline bool mobile_page(struct page *page)
> +{
> +	return false;
> +}
> +
> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> +{
> +	return false;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)
> +{
> +}
>  #endif /* CONFIG_COMPACTION */
>  
>  #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a0653e5..2cc4b24 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -396,6 +396,8 @@ struct address_space_operations {
>  	 */
>  	int (*migratepage) (struct address_space *,
>  			struct page *, struct page *, enum migrate_mode);
> +	bool (*isolatepage) (struct page *, isolate_mode_t);
> +	void (*putbackpage) (struct page *);
>  	int (*launder_page) (struct page *);
>  	int (*is_partially_uptodate) (struct page *, unsigned long,
>  					unsigned long);
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index f34e040..abef145 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page *page)
>  	atomic_set(&page->_mapcount, -1);
>  }
>  
> +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
> +
> +static inline int PageMobile(struct page *page)
> +{
> +	return atomic_read(&page->_mapcount) == PAGE_MOBILE_MAPCOUNT_VALUE;
> +}
> +
> +static inline void __SetPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
> +	atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
> +}
> +
> +static inline void __ClearPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(!PageMobile(page), page);
> +	atomic_set(&page->_mapcount, -1);
> +}
> +

This definition of Mobility would prevent LRU pages ever being considered
"mobile" in the same why. Why do we not either check it's an LRU page (in
which case it's inherently mobile) or has an aops with the correct handlers?

>  /*
>   * If network-based swap is enabled, sl*b must keep track of whether pages
>   * were allocated from pfmemalloc reserves.
> diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
> index a6c4962..d50d9e8 100644
> --- a/include/uapi/linux/kernel-page-flags.h
> +++ b/include/uapi/linux/kernel-page-flags.h
> @@ -33,6 +33,7 @@
>  #define KPF_THP			22
>  #define KPF_BALLOON		23
>  #define KPF_ZERO_PAGE		24
> +#define KPF_MOBILE		25
>  
>  
>  #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
Minchan Kim July 31, 2015, 10:43 a.m. UTC | #5
Hi Gioh,

On Mon, Jul 13, 2015 at 05:35:17PM +0900, Gioh Kim wrote:
> From: Gioh Kim <gurugio@hanmail.net>
> 
> Add framework to register callback functions and check page mobility.
> There are some modes for page isolation so that isolate interface
> has arguments of page address and isolation mode while putback
> interface has only page address as argument.
> 
> Signed-off-by: Gioh Kim <gioh.kim@lge.com>
> Acked-by: Rafael Aquini <aquini@redhat.com>
> ---
>  fs/proc/page.c                         |  3 ++
>  include/linux/compaction.h             | 80 ++++++++++++++++++++++++++++++++++
>  include/linux/fs.h                     |  2 +
>  include/linux/page-flags.h             | 19 ++++++++
>  include/uapi/linux/kernel-page-flags.h |  1 +
>  5 files changed, 105 insertions(+)
> 
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 7eee2d8..a4f5a00 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
>  	if (PageBalloon(page))
>  		u |= 1 << KPF_BALLOON;
>  
> +	if (PageMobile(page))
> +		u |= 1 << KPF_MOBILE;
> +

Need a new page flag for non-LRU page migration?
I am worry that we don't have empty slot for page flag on 32-bit.

Aha, see SetPageMobile below. You use _mapcount.
It seems to be work for non-LRU pages but unfortunately, zsmalloc
already have used that field as own purpose so there is no room
for it.


>  	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
>  
>  	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index aa8f61c..f693072 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -1,6 +1,9 @@
>  #ifndef _LINUX_COMPACTION_H
>  #define _LINUX_COMPACTION_H
>  
> +#include <linux/page-flags.h>
> +#include <linux/pagemap.h>
> +
>  /* Return values for compact_zone() and try_to_compact_pages() */
>  /* compaction didn't start as it was deferred due to past failures */
>  #define COMPACT_DEFERRED	0
> @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone, int order,
>  				bool alloc_success);
>  extern bool compaction_restarting(struct zone *zone, int order);
>  
> +static inline bool mobile_page(struct page *page)
> +{
> +	return page->mapping &&	(PageMobile(page) || PageBalloon(page));


What's the locking rule to test mobile_page?
Why we need such many checking?

Why we need PageBalloon check?
You are trying to make generic abstraction for non-LRU page to migrate
but PageBalloon check in here breaks your goal.

> +}
> +
> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> +{
> +	bool ret = false;
> +
> +	/*
> +	 * Avoid burning cycles with pages that are yet under __free_pages(),
> +	 * or just got freed under us.
> +	 *
> +	 * In case we 'win' a race for a mobile page being freed under us and
> +	 * raise its refcount preventing __free_pages() from doing its job
> +	 * the put_page() at the end of this block will take care of
> +	 * release this page, thus avoiding a nasty leakage.
> +	 */

Good.

> +	if (unlikely(!get_page_unless_zero(page)))
> +		goto out;
> +
> +	/*
> +	 * As mobile pages are not isolated from LRU lists, concurrent
> +	 * compaction threads can race against page migration functions
> +	 * as well as race against the releasing a page.

How can it race with releasing a page?
We already get refcount above.

Do you mean page->mapping tearing race?
If so, zsmalloc should be chaned to hold a lock.


> +	 *
> +	 * In order to avoid having an already isolated mobile page
> +	 * being (wrongly) re-isolated while it is under migration,
> +	 * or to avoid attempting to isolate pages being released,
> +	 * lets be sure we have the page lock
> +	 * before proceeding with the mobile page isolation steps.
> +	 */
> +	if (unlikely(!trylock_page(page)))
> +		goto out_putpage;
> +
> +	if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
> +		goto out_not_isolated;

I cannot know how isolate_mobilepage is called by upper layer
because this patch doesn't include it.
Anyway, why we need such many checks to prove it's mobile page?

mobile_page() {
	page->mapping && (PageMobile(page) || PageBalloon(page));
}

Additionally, added page->mapping->a_ops->isolatepage check

Is it possible that a non-LRU page's address space provides
only page->maping->migratepage without isolate/putback?

Couldn't we make it simple like this?

        page->mapping && page->mapping->migratepage

So, couldn't we make mobile_page like this

PageMobile(struct page *page)
{
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageLRU(page), page);

        return page->mapping && page->mapping->migratepage;
}

It will save _mapcount of struct page.

> +	ret = page->mapping->a_ops->isolatepage(page, mode);
> +	if (!ret)
> +		goto out_not_isolated;
> +	unlock_page(page);
> +	return ret;
> +
> +out_not_isolated:
> +	unlock_page(page);
> +out_putpage:
> +	put_page(page);
> +out:
> +	return ret;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)
> +{
> +	/*
> +	 * 'lock_page()' stabilizes the page and prevents races against

What does it mean 'stabilize'?
Clear comment is always welcome rather than vague word.

> +	 * concurrent isolation threads attempting to re-isolate it.
> +	 */
> +	lock_page(page);
> +	if (page->mapping && page->mapping->a_ops->putbackpage)
> +		page->mapping->a_ops->putbackpage(page);
> +	unlock_page(page);
> +	/* drop the extra ref count taken for mobile page isolation */
> +	put_page(page);
> +}
>  #else
>  static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
>  			unsigned int order, int alloc_flags,
> @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone *zone, int order)
>  	return true;
>  }
>  
> +static inline bool mobile_page(struct page *page)
> +{
> +	return false;
> +}
> +
> +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> +{
> +	return false;
> +}
> +
> +static inline void putback_mobilepage(struct page *page)
> +{
> +}
>  #endif /* CONFIG_COMPACTION */
>  
>  #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index a0653e5..2cc4b24 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -396,6 +396,8 @@ struct address_space_operations {
>  	 */
>  	int (*migratepage) (struct address_space *,
>  			struct page *, struct page *, enum migrate_mode);
> +	bool (*isolatepage) (struct page *, isolate_mode_t);
> +	void (*putbackpage) (struct page *);
>  	int (*launder_page) (struct page *);
>  	int (*is_partially_uptodate) (struct page *, unsigned long,
>  					unsigned long);
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index f34e040..abef145 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page *page)
>  	atomic_set(&page->_mapcount, -1);
>  }
>  
> +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
> +
> +static inline int PageMobile(struct page *page)
> +{
> +	return atomic_read(&page->_mapcount) == PAGE_MOBILE_MAPCOUNT_VALUE;
> +}
> +
> +static inline void __SetPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
> +	atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
> +}
> +
> +static inline void __ClearPageMobile(struct page *page)
> +{
> +	VM_BUG_ON_PAGE(!PageMobile(page), page);
> +	atomic_set(&page->_mapcount, -1);
> +}
> +
>  /*
>   * If network-based swap is enabled, sl*b must keep track of whether pages
>   * were allocated from pfmemalloc reserves.
> diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
> index a6c4962..d50d9e8 100644
> --- a/include/uapi/linux/kernel-page-flags.h
> +++ b/include/uapi/linux/kernel-page-flags.h
> @@ -33,6 +33,7 @@
>  #define KPF_THP			22
>  #define KPF_BALLOON		23
>  #define KPF_ZERO_PAGE		24
> +#define KPF_MOBILE		25
>  
>  
>  #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
> -- 
> 2.1.4
>
Minchan Kim Aug. 10, 2015, 7:19 a.m. UTC | #6
On Fri, Jul 31, 2015 at 07:43:09PM +0900, Minchan Kim wrote:
> Hi Gioh,
> 
> On Mon, Jul 13, 2015 at 05:35:17PM +0900, Gioh Kim wrote:
> > From: Gioh Kim <gurugio@hanmail.net>
> > 
> > Add framework to register callback functions and check page mobility.
> > There are some modes for page isolation so that isolate interface
> > has arguments of page address and isolation mode while putback
> > interface has only page address as argument.
> > 
> > Signed-off-by: Gioh Kim <gioh.kim@lge.com>
> > Acked-by: Rafael Aquini <aquini@redhat.com>
> > ---
> >  fs/proc/page.c                         |  3 ++
> >  include/linux/compaction.h             | 80 ++++++++++++++++++++++++++++++++++
> >  include/linux/fs.h                     |  2 +
> >  include/linux/page-flags.h             | 19 ++++++++
> >  include/uapi/linux/kernel-page-flags.h |  1 +
> >  5 files changed, 105 insertions(+)
> > 
> > diff --git a/fs/proc/page.c b/fs/proc/page.c
> > index 7eee2d8..a4f5a00 100644
> > --- a/fs/proc/page.c
> > +++ b/fs/proc/page.c
> > @@ -146,6 +146,9 @@ u64 stable_page_flags(struct page *page)
> >  	if (PageBalloon(page))
> >  		u |= 1 << KPF_BALLOON;
> >  
> > +	if (PageMobile(page))
> > +		u |= 1 << KPF_MOBILE;
> > +
> 
> Need a new page flag for non-LRU page migration?
> I am worry that we don't have empty slot for page flag on 32-bit.
> 
> Aha, see SetPageMobile below. You use _mapcount.
> It seems to be work for non-LRU pages but unfortunately, zsmalloc
> already have used that field as own purpose so there is no room
> for it.

Gioh, I just sent a patch which zsmalloc doesn't use page->mapping
and _mapcount so I think zsmalloc could support compaction with your
patchset. Although zsmalloc can support compaction with it, I still
don't think it's a good that driver have to mark pages mobile via
_mapcount.

I hope you can find another way.

Thanks.

> 
> 
> >  	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
> >  
> >  	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
> > diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> > index aa8f61c..f693072 100644
> > --- a/include/linux/compaction.h
> > +++ b/include/linux/compaction.h
> > @@ -1,6 +1,9 @@
> >  #ifndef _LINUX_COMPACTION_H
> >  #define _LINUX_COMPACTION_H
> >  
> > +#include <linux/page-flags.h>
> > +#include <linux/pagemap.h>
> > +
> >  /* Return values for compact_zone() and try_to_compact_pages() */
> >  /* compaction didn't start as it was deferred due to past failures */
> >  #define COMPACT_DEFERRED	0
> > @@ -51,6 +54,70 @@ extern void compaction_defer_reset(struct zone *zone, int order,
> >  				bool alloc_success);
> >  extern bool compaction_restarting(struct zone *zone, int order);
> >  
> > +static inline bool mobile_page(struct page *page)
> > +{
> > +	return page->mapping &&	(PageMobile(page) || PageBalloon(page));
> 
> 
> What's the locking rule to test mobile_page?
> Why we need such many checking?
> 
> Why we need PageBalloon check?
> You are trying to make generic abstraction for non-LRU page to migrate
> but PageBalloon check in here breaks your goal.
> 
> > +}
> > +
> > +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> > +{
> > +	bool ret = false;
> > +
> > +	/*
> > +	 * Avoid burning cycles with pages that are yet under __free_pages(),
> > +	 * or just got freed under us.
> > +	 *
> > +	 * In case we 'win' a race for a mobile page being freed under us and
> > +	 * raise its refcount preventing __free_pages() from doing its job
> > +	 * the put_page() at the end of this block will take care of
> > +	 * release this page, thus avoiding a nasty leakage.
> > +	 */
> 
> Good.
> 
> > +	if (unlikely(!get_page_unless_zero(page)))
> > +		goto out;
> > +
> > +	/*
> > +	 * As mobile pages are not isolated from LRU lists, concurrent
> > +	 * compaction threads can race against page migration functions
> > +	 * as well as race against the releasing a page.
> 
> How can it race with releasing a page?
> We already get refcount above.
> 
> Do you mean page->mapping tearing race?
> If so, zsmalloc should be chaned to hold a lock.
> 
> 
> > +	 *
> > +	 * In order to avoid having an already isolated mobile page
> > +	 * being (wrongly) re-isolated while it is under migration,
> > +	 * or to avoid attempting to isolate pages being released,
> > +	 * lets be sure we have the page lock
> > +	 * before proceeding with the mobile page isolation steps.
> > +	 */
> > +	if (unlikely(!trylock_page(page)))
> > +		goto out_putpage;
> > +
> > +	if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
> > +		goto out_not_isolated;
> 
> I cannot know how isolate_mobilepage is called by upper layer
> because this patch doesn't include it.
> Anyway, why we need such many checks to prove it's mobile page?
> 
> mobile_page() {
> 	page->mapping && (PageMobile(page) || PageBalloon(page));
> }
> 
> Additionally, added page->mapping->a_ops->isolatepage check
> 
> Is it possible that a non-LRU page's address space provides
> only page->maping->migratepage without isolate/putback?
> 
> Couldn't we make it simple like this?
> 
>         page->mapping && page->mapping->migratepage
> 
> So, couldn't we make mobile_page like this
> 
> PageMobile(struct page *page)
> {
>         VM_BUG_ON_PAGE(!PageLocked(page), page);
>         VM_BUG_ON_PAGE(PageLRU(page), page);
> 
>         return page->mapping && page->mapping->migratepage;
> }
> 
> It will save _mapcount of struct page.
> 
> > +	ret = page->mapping->a_ops->isolatepage(page, mode);
> > +	if (!ret)
> > +		goto out_not_isolated;
> > +	unlock_page(page);
> > +	return ret;
> > +
> > +out_not_isolated:
> > +	unlock_page(page);
> > +out_putpage:
> > +	put_page(page);
> > +out:
> > +	return ret;
> > +}
> > +
> > +static inline void putback_mobilepage(struct page *page)
> > +{
> > +	/*
> > +	 * 'lock_page()' stabilizes the page and prevents races against
> 
> What does it mean 'stabilize'?
> Clear comment is always welcome rather than vague word.
> 
> > +	 * concurrent isolation threads attempting to re-isolate it.
> > +	 */
> > +	lock_page(page);
> > +	if (page->mapping && page->mapping->a_ops->putbackpage)
> > +		page->mapping->a_ops->putbackpage(page);
> > +	unlock_page(page);
> > +	/* drop the extra ref count taken for mobile page isolation */
> > +	put_page(page);
> > +}
> >  #else
> >  static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
> >  			unsigned int order, int alloc_flags,
> > @@ -83,6 +150,19 @@ static inline bool compaction_deferred(struct zone *zone, int order)
> >  	return true;
> >  }
> >  
> > +static inline bool mobile_page(struct page *page)
> > +{
> > +	return false;
> > +}
> > +
> > +static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
> > +{
> > +	return false;
> > +}
> > +
> > +static inline void putback_mobilepage(struct page *page)
> > +{
> > +}
> >  #endif /* CONFIG_COMPACTION */
> >  
> >  #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index a0653e5..2cc4b24 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -396,6 +396,8 @@ struct address_space_operations {
> >  	 */
> >  	int (*migratepage) (struct address_space *,
> >  			struct page *, struct page *, enum migrate_mode);
> > +	bool (*isolatepage) (struct page *, isolate_mode_t);
> > +	void (*putbackpage) (struct page *);
> >  	int (*launder_page) (struct page *);
> >  	int (*is_partially_uptodate) (struct page *, unsigned long,
> >  					unsigned long);
> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > index f34e040..abef145 100644
> > --- a/include/linux/page-flags.h
> > +++ b/include/linux/page-flags.h
> > @@ -582,6 +582,25 @@ static inline void __ClearPageBalloon(struct page *page)
> >  	atomic_set(&page->_mapcount, -1);
> >  }
> >  
> > +#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
> > +
> > +static inline int PageMobile(struct page *page)
> > +{
> > +	return atomic_read(&page->_mapcount) == PAGE_MOBILE_MAPCOUNT_VALUE;
> > +}
> > +
> > +static inline void __SetPageMobile(struct page *page)
> > +{
> > +	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
> > +	atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
> > +}
> > +
> > +static inline void __ClearPageMobile(struct page *page)
> > +{
> > +	VM_BUG_ON_PAGE(!PageMobile(page), page);
> > +	atomic_set(&page->_mapcount, -1);
> > +}
> > +
> >  /*
> >   * If network-based swap is enabled, sl*b must keep track of whether pages
> >   * were allocated from pfmemalloc reserves.
> > diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
> > index a6c4962..d50d9e8 100644
> > --- a/include/uapi/linux/kernel-page-flags.h
> > +++ b/include/uapi/linux/kernel-page-flags.h
> > @@ -33,6 +33,7 @@
> >  #define KPF_THP			22
> >  #define KPF_BALLOON		23
> >  #define KPF_ZERO_PAGE		24
> > +#define KPF_MOBILE		25
> >  
> >  
> >  #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
> > -- 
> > 2.1.4
> >

Patch
diff mbox

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7eee2d8..a4f5a00 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,6 +146,9 @@  u64 stable_page_flags(struct page *page)
 	if (PageBalloon(page))
 		u |= 1 << KPF_BALLOON;
 
+	if (PageMobile(page))
+		u |= 1 << KPF_MOBILE;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index aa8f61c..f693072 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,9 @@ 
 #ifndef _LINUX_COMPACTION_H
 #define _LINUX_COMPACTION_H
 
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+
 /* Return values for compact_zone() and try_to_compact_pages() */
 /* compaction didn't start as it was deferred due to past failures */
 #define COMPACT_DEFERRED	0
@@ -51,6 +54,70 @@  extern void compaction_defer_reset(struct zone *zone, int order,
 				bool alloc_success);
 extern bool compaction_restarting(struct zone *zone, int order);
 
+static inline bool mobile_page(struct page *page)
+{
+	return page->mapping &&	(PageMobile(page) || PageBalloon(page));
+}
+
+static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
+{
+	bool ret = false;
+
+	/*
+	 * Avoid burning cycles with pages that are yet under __free_pages(),
+	 * or just got freed under us.
+	 *
+	 * In case we 'win' a race for a mobile page being freed under us and
+	 * raise its refcount preventing __free_pages() from doing its job
+	 * the put_page() at the end of this block will take care of
+	 * release this page, thus avoiding a nasty leakage.
+	 */
+	if (unlikely(!get_page_unless_zero(page)))
+		goto out;
+
+	/*
+	 * As mobile pages are not isolated from LRU lists, concurrent
+	 * compaction threads can race against page migration functions
+	 * as well as race against the releasing a page.
+	 *
+	 * In order to avoid having an already isolated mobile page
+	 * being (wrongly) re-isolated while it is under migration,
+	 * or to avoid attempting to isolate pages being released,
+	 * lets be sure we have the page lock
+	 * before proceeding with the mobile page isolation steps.
+	 */
+	if (unlikely(!trylock_page(page)))
+		goto out_putpage;
+
+	if (!(mobile_page(page) && page->mapping->a_ops->isolatepage))
+		goto out_not_isolated;
+	ret = page->mapping->a_ops->isolatepage(page, mode);
+	if (!ret)
+		goto out_not_isolated;
+	unlock_page(page);
+	return ret;
+
+out_not_isolated:
+	unlock_page(page);
+out_putpage:
+	put_page(page);
+out:
+	return ret;
+}
+
+static inline void putback_mobilepage(struct page *page)
+{
+	/*
+	 * 'lock_page()' stabilizes the page and prevents races against
+	 * concurrent isolation threads attempting to re-isolate it.
+	 */
+	lock_page(page);
+	if (page->mapping && page->mapping->a_ops->putbackpage)
+		page->mapping->a_ops->putbackpage(page);
+	unlock_page(page);
+	/* drop the extra ref count taken for mobile page isolation */
+	put_page(page);
+}
 #else
 static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
 			unsigned int order, int alloc_flags,
@@ -83,6 +150,19 @@  static inline bool compaction_deferred(struct zone *zone, int order)
 	return true;
 }
 
+static inline bool mobile_page(struct page *page)
+{
+	return false;
+}
+
+static inline bool isolate_mobilepage(struct page *page, isolate_mode_t mode)
+{
+	return false;
+}
+
+static inline void putback_mobilepage(struct page *page)
+{
+}
 #endif /* CONFIG_COMPACTION */
 
 #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0653e5..2cc4b24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -396,6 +396,8 @@  struct address_space_operations {
 	 */
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
+	bool (*isolatepage) (struct page *, isolate_mode_t);
+	void (*putbackpage) (struct page *);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, unsigned long,
 					unsigned long);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f34e040..abef145 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -582,6 +582,25 @@  static inline void __ClearPageBalloon(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+#define PAGE_MOBILE_MAPCOUNT_VALUE (-255)
+
+static inline int PageMobile(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_MOBILE_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageMobile(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_MOBILE_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageMobile(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageMobile(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index a6c4962..d50d9e8 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -33,6 +33,7 @@ 
 #define KPF_THP			22
 #define KPF_BALLOON		23
 #define KPF_ZERO_PAGE		24
+#define KPF_MOBILE		25
 
 
 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */