diff mbox

[RFC] mm, drm/i915: Mark pinned shmemfs pages as unevictable

Message ID 20170606120436.8683-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 6, 2017, 12:04 p.m. UTC
Similar in principle to the treatment of get_user_pages, pages that
i915.ko acquires from shmemfs are not immediately reclaimable and so
should be excluded from the mm accounting and vmscan until they have
been returned to the system via shrink_slab/i915_gem_shrink. By moving
the unreclaimable pages off the inactive anon lru, not only should
vmscan be improved by avoiding walking unreclaimable pages, but the
system should also have a better idea of how much memory it can reclaim
at that moment in time.

Note, however, the interaction with shrink_slab which will move some
mlocked pages back to the inactive anon lru.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
 mm/mlock.c                      |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

Michal Hocko June 6, 2017, 12:14 p.m. UTC | #1
On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> Similar in principle to the treatment of get_user_pages, pages that
> i915.ko acquires from shmemfs are not immediately reclaimable and so
> should be excluded from the mm accounting and vmscan until they have
> been returned to the system via shrink_slab/i915_gem_shrink. By moving
> the unreclaimable pages off the inactive anon lru, not only should
> vmscan be improved by avoiding walking unreclaimable pages, but the
> system should also have a better idea of how much memory it can reclaim
> at that moment in time.

That is certainly desirable. Peter has proposed a generic pin_page (or
similar) API. What happened with it? I think it would be a better
approach than (ab)using mlock API. I am also not familiar with the i915
code to be sure that using lock_page is really safe here. I think that
all we need is to simply move those pages in/out to/from unevictable LRU
list on pin/unpining.

> Note, however, the interaction with shrink_slab which will move some
> mlocked pages back to the inactive anon lru.
> 
> Suggested-by: Dave Hansen <dave.hansen@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Michal Hocko <mhocko@suse.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
>  mm/mlock.c                      |  2 ++
>  2 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8cb811519db1..37a98fbc6a12 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2193,6 +2193,9 @@ void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>  	obj->mm.pages = ERR_PTR(-EFAULT);
>  }
>  
> +extern void mlock_vma_page(struct page *page);
> +extern unsigned int munlock_vma_page(struct page *page);
> +
>  static void
>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
>  			      struct sg_table *pages)
> @@ -2214,6 +2217,10 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
>  		if (obj->mm.madv == I915_MADV_WILLNEED)
>  			mark_page_accessed(page);
>  
> +		lock_page(page);
> +		munlock_vma_page(page);
> +		unlock_page(page);
> +
>  		put_page(page);
>  	}
>  	obj->mm.dirty = false;
> @@ -2412,6 +2419,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  		}
>  		last_pfn = page_to_pfn(page);
>  
> +		lock_page(page);
> +		mlock_vma_page(page);
> +		unlock_page(page);
> +
>  		/* Check that the i965g/gm workaround works. */
>  		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
>  	}
> @@ -2450,8 +2461,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  err_sg:
>  	sg_mark_end(sg);
>  err_pages:
> -	for_each_sgt_page(page, sgt_iter, st)
> +	for_each_sgt_page(page, sgt_iter, st) {
> +		lock_page(page);
> +		munlock_vma_page(page);
> +		unlock_page(page);
>  		put_page(page);
> +	}
>  	sg_free_table(st);
>  	kfree(st);
>  
> diff --git a/mm/mlock.c b/mm/mlock.c
> index b562b5523a65..531d9f8fd033 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -94,6 +94,7 @@ void mlock_vma_page(struct page *page)
>  			putback_lru_page(page);
>  	}
>  }
> +EXPORT_SYMBOL_GPL(mlock_vma_page);
>  
>  /*
>   * Isolate a page from LRU with optional get_page() pin.
> @@ -211,6 +212,7 @@ unsigned int munlock_vma_page(struct page *page)
>  out:
>  	return nr_pages - 1;
>  }
> +EXPORT_SYMBOL_GPL(munlock_vma_page);
>  
>  /*
>   * convert get_user_pages() return value to posix mlock() error
> -- 
> 2.11.0
>
Vlastimil Babka June 6, 2017, 12:30 p.m. UTC | #2
On 06/06/2017 02:14 PM, Michal Hocko wrote:
> On Tue 06-06-17 13:04:36, Chris Wilson wrote:
>> Similar in principle to the treatment of get_user_pages, pages that
>> i915.ko acquires from shmemfs are not immediately reclaimable and so
>> should be excluded from the mm accounting and vmscan until they have
>> been returned to the system via shrink_slab/i915_gem_shrink. By moving
>> the unreclaimable pages off the inactive anon lru, not only should
>> vmscan be improved by avoiding walking unreclaimable pages, but the
>> system should also have a better idea of how much memory it can reclaim
>> at that moment in time.
> 
> That is certainly desirable. Peter has proposed a generic pin_page (or
> similar) API. What happened with it? I think it would be a better
> approach than (ab)using mlock API. I am also not familiar with the i915
> code to be sure that using lock_page is really safe here. I think that
> all we need is to simply move those pages in/out to/from unevictable LRU
> list on pin/unpining.

Hmm even when on unevictable list, the pages were still allocated as
MOVABLE, while pinning prevents them from being migrated, so it doesn't
play well with compaction/grouping by mobility/CMA etc. Addressing that
would be more useful IMHO, and e.g. one of the features envisioned for
the pinning API was to first migrate the pinned pages out of movable
zones and CMA/MOVABLE pageblocks.

>> Note, however, the interaction with shrink_slab which will move some
>> mlocked pages back to the inactive anon lru.
>>
>> Suggested-by: Dave Hansen <dave.hansen@intel.com>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Dave Hansen <dave.hansen@intel.com>
>> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> ---
>>  drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
>>  mm/mlock.c                      |  2 ++
>>  2 files changed, 18 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 8cb811519db1..37a98fbc6a12 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2193,6 +2193,9 @@ void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>>  	obj->mm.pages = ERR_PTR(-EFAULT);
>>  }
>>  
>> +extern void mlock_vma_page(struct page *page);
>> +extern unsigned int munlock_vma_page(struct page *page);
>> +
>>  static void
>>  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
>>  			      struct sg_table *pages)
>> @@ -2214,6 +2217,10 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
>>  		if (obj->mm.madv == I915_MADV_WILLNEED)
>>  			mark_page_accessed(page);
>>  
>> +		lock_page(page);
>> +		munlock_vma_page(page);
>> +		unlock_page(page);
>> +
>>  		put_page(page);
>>  	}
>>  	obj->mm.dirty = false;
>> @@ -2412,6 +2419,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>>  		}
>>  		last_pfn = page_to_pfn(page);
>>  
>> +		lock_page(page);
>> +		mlock_vma_page(page);
>> +		unlock_page(page);
>> +
>>  		/* Check that the i965g/gm workaround works. */
>>  		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
>>  	}
>> @@ -2450,8 +2461,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>>  err_sg:
>>  	sg_mark_end(sg);
>>  err_pages:
>> -	for_each_sgt_page(page, sgt_iter, st)
>> +	for_each_sgt_page(page, sgt_iter, st) {
>> +		lock_page(page);
>> +		munlock_vma_page(page);
>> +		unlock_page(page);
>>  		put_page(page);
>> +	}
>>  	sg_free_table(st);
>>  	kfree(st);
>>  
>> diff --git a/mm/mlock.c b/mm/mlock.c
>> index b562b5523a65..531d9f8fd033 100644
>> --- a/mm/mlock.c
>> +++ b/mm/mlock.c
>> @@ -94,6 +94,7 @@ void mlock_vma_page(struct page *page)
>>  			putback_lru_page(page);
>>  	}
>>  }
>> +EXPORT_SYMBOL_GPL(mlock_vma_page);
>>  
>>  /*
>>   * Isolate a page from LRU with optional get_page() pin.
>> @@ -211,6 +212,7 @@ unsigned int munlock_vma_page(struct page *page)
>>  out:
>>  	return nr_pages - 1;
>>  }
>> +EXPORT_SYMBOL_GPL(munlock_vma_page);
>>  
>>  /*
>>   * convert get_user_pages() return value to posix mlock() error
>> -- 
>> 2.11.0
>>
>
Chris Wilson June 6, 2017, 12:34 p.m. UTC | #3
Quoting Michal Hocko (2017-06-06 13:14:18)
> On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> > Similar in principle to the treatment of get_user_pages, pages that
> > i915.ko acquires from shmemfs are not immediately reclaimable and so
> > should be excluded from the mm accounting and vmscan until they have
> > been returned to the system via shrink_slab/i915_gem_shrink. By moving
> > the unreclaimable pages off the inactive anon lru, not only should
> > vmscan be improved by avoiding walking unreclaimable pages, but the
> > system should also have a better idea of how much memory it can reclaim
> > at that moment in time.
> 
> That is certainly desirable. Peter has proposed a generic pin_page (or
> similar) API. What happened with it? I think it would be a better
> approach than (ab)using mlock API. I am also not familiar with the i915
> code to be sure that using lock_page is really safe here. I think that
> all we need is to simply move those pages in/out to/from unevictable LRU
> list on pin/unpining.

With respect to i915, we may not be the sole owner of the page at the
point where we call shmem_read_mapping_page_gfp() as it can mmapped or
accessed directly via the mapping internally. It is just at this point
we know that the page will not be returned to the system until we have
finished using it with the GPU.

An API that didn't assume the page was locked or require exclusive
ownership would be needed for random driver usage like i915.ko
-Chris
Chris Wilson June 6, 2017, 2:05 p.m. UTC | #4
Quoting Vlastimil Babka (2017-06-06 13:30:15)
> On 06/06/2017 02:14 PM, Michal Hocko wrote:
> > On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> >> Similar in principle to the treatment of get_user_pages, pages that
> >> i915.ko acquires from shmemfs are not immediately reclaimable and so
> >> should be excluded from the mm accounting and vmscan until they have
> >> been returned to the system via shrink_slab/i915_gem_shrink. By moving
> >> the unreclaimable pages off the inactive anon lru, not only should
> >> vmscan be improved by avoiding walking unreclaimable pages, but the
> >> system should also have a better idea of how much memory it can reclaim
> >> at that moment in time.
> > 
> > That is certainly desirable. Peter has proposed a generic pin_page (or
> > similar) API. What happened with it? I think it would be a better
> > approach than (ab)using mlock API. I am also not familiar with the i915
> > code to be sure that using lock_page is really safe here. I think that
> > all we need is to simply move those pages in/out to/from unevictable LRU
> > list on pin/unpining.
> 
> Hmm even when on unevictable list, the pages were still allocated as
> MOVABLE, while pinning prevents them from being migrated, so it doesn't
> play well with compaction/grouping by mobility/CMA etc. Addressing that
> would be more useful IMHO, and e.g. one of the features envisioned for
> the pinning API was to first migrate the pinned pages out of movable
> zones and CMA/MOVABLE pageblocks.

Whilst today i915 doesn't take part in compaction, we do have
plans/patches for enabling migratepage. It would be nice not to nip that
in the bud.
-Chris
Dave Hansen June 6, 2017, 4:17 p.m. UTC | #5
On 06/06/2017 05:14 AM, Michal Hocko wrote:
> On Tue 06-06-17 13:04:36, Chris Wilson wrote:
>> Similar in principle to the treatment of get_user_pages, pages that
>> i915.ko acquires from shmemfs are not immediately reclaimable and so
>> should be excluded from the mm accounting and vmscan until they have
>> been returned to the system via shrink_slab/i915_gem_shrink. By moving
>> the unreclaimable pages off the inactive anon lru, not only should
>> vmscan be improved by avoiding walking unreclaimable pages, but the
>> system should also have a better idea of how much memory it can reclaim
>> at that moment in time.
> That is certainly desirable. Peter has proposed a generic pin_page (or
> similar) API. What happened with it? I think it would be a better
> approach than (ab)using mlock API. I am also not familiar with the i915
> code to be sure that using lock_page is really safe here. I think that
> all we need is to simply move those pages in/out to/from unevictable LRU
> list on pin/unpining.

Yes, very true.  I just suggested mlock'ing them because it was the
simplest way to get page_evictable() to return true.
Dave Hansen June 6, 2017, 4:17 p.m. UTC | #6
On 06/06/2017 05:34 AM, Chris Wilson wrote:
> With respect to i915, we may not be the sole owner of the page at the
> point where we call shmem_read_mapping_page_gfp() as it can mmapped or
> accessed directly via the mapping internally. It is just at this point
> we know that the page will not be returned to the system until we have
> finished using it with the GPU.
> 
> An API that didn't assume the page was locked or require exclusive
> ownership would be needed for random driver usage like i915.ko

Why do you think exclusive ownership is required, btw?  What does
exclusive ownership mean, anyway?  page_count()==1 and you old the old
reference?
Peter Zijlstra June 6, 2017, 4:23 p.m. UTC | #7
On Tue, Jun 06, 2017 at 02:14:18PM +0200, Michal Hocko wrote:
> That is certainly desirable. Peter has proposed a generic pin_page (or
> similar) API. What happened with it?

I got stuck on converting IB ... and I think someone thereafter made an
ever bigger mess of the pinning stuff. I don't know, I'd have to revisit
all that :/
Jerome Glisse June 6, 2017, 5:47 p.m. UTC | #8
On Tue, Jun 06, 2017 at 02:30:15PM +0200, Vlastimil Babka wrote:
> On 06/06/2017 02:14 PM, Michal Hocko wrote:
> > On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> >> Similar in principle to the treatment of get_user_pages, pages that
> >> i915.ko acquires from shmemfs are not immediately reclaimable and so
> >> should be excluded from the mm accounting and vmscan until they have
> >> been returned to the system via shrink_slab/i915_gem_shrink. By moving
> >> the unreclaimable pages off the inactive anon lru, not only should
> >> vmscan be improved by avoiding walking unreclaimable pages, but the
> >> system should also have a better idea of how much memory it can reclaim
> >> at that moment in time.
> > 
> > That is certainly desirable. Peter has proposed a generic pin_page (or
> > similar) API. What happened with it? I think it would be a better
> > approach than (ab)using mlock API. I am also not familiar with the i915
> > code to be sure that using lock_page is really safe here. I think that
> > all we need is to simply move those pages in/out to/from unevictable LRU
> > list on pin/unpining.
> 
> Hmm even when on unevictable list, the pages were still allocated as
> MOVABLE, while pinning prevents them from being migrated, so it doesn't
> play well with compaction/grouping by mobility/CMA etc. Addressing that
> would be more useful IMHO, and e.g. one of the features envisioned for
> the pinning API was to first migrate the pinned pages out of movable
> zones and CMA/MOVABLE pageblocks.

Cost would be high, GPU dataset can be big (giga byte range) so i don't
see copying out of MOVABLE as something sane to do here. Maybe we can
reuse the lru pointer to store a pointer to function and metadata so that
who ever pin a page provide a way to unpin it through this function.

Issue then is how to handle double pin (ie when 2 different driver want
to pin same page).

Cheers,
Jérôme
Chris Wilson Aug. 19, 2017, 1:15 p.m. UTC | #9
Quoting Michal Hocko (2017-06-06 13:14:18)
> On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> > Similar in principle to the treatment of get_user_pages, pages that
> > i915.ko acquires from shmemfs are not immediately reclaimable and so
> > should be excluded from the mm accounting and vmscan until they have
> > been returned to the system via shrink_slab/i915_gem_shrink. By moving
> > the unreclaimable pages off the inactive anon lru, not only should
> > vmscan be improved by avoiding walking unreclaimable pages, but the
> > system should also have a better idea of how much memory it can reclaim
> > at that moment in time.
> 
> That is certainly desirable. Peter has proposed a generic pin_page (or
> similar) API. What happened with it? I think it would be a better
> approach than (ab)using mlock API. I am also not familiar with the i915
> code to be sure that using lock_page is really safe here. I think that
> all we need is to simply move those pages in/out to/from unevictable LRU
> list on pin/unpining.

I just had the opportunity to try this mlock_vma_page() hack on a
borderline swapping system (i.e. lots of vmpressure between i915 buffers
and the buffercache), and marking the i915 pages as unevictable makes a
huge difference in avoiding stalls in direct reclaim across the system.

Reading back over the thread, it seems that the simplest approach going
forward is a small api for managing the pages on the unevictable LRU?

> > Note, however, the interaction with shrink_slab which will move some
> > mlocked pages back to the inactive anon lru.
> > 
> > Suggested-by: Dave Hansen <dave.hansen@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Matthew Auld <matthew.auld@intel.com>
> > Cc: Dave Hansen <dave.hansen@intel.com>
> > Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Michal Hocko <mhocko@suse.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
> >  mm/mlock.c                      |  2 ++
> >  2 files changed, 18 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 8cb811519db1..37a98fbc6a12 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2193,6 +2193,9 @@ void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
> >       obj->mm.pages = ERR_PTR(-EFAULT);
> >  }
> >  
> > +extern void mlock_vma_page(struct page *page);
> > +extern unsigned int munlock_vma_page(struct page *page);
> > +
> >  static void
> >  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
> >                             struct sg_table *pages)
> > @@ -2214,6 +2217,10 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
> >               if (obj->mm.madv == I915_MADV_WILLNEED)
> >                       mark_page_accessed(page);
> >  
> > +             lock_page(page);
> > +             munlock_vma_page(page);
> > +             unlock_page(page);
> > +
> >               put_page(page);
> >       }
> >       obj->mm.dirty = false;
> > @@ -2412,6 +2419,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >               }
> >               last_pfn = page_to_pfn(page);
> >  
> > +             lock_page(page);
> > +             mlock_vma_page(page);
> > +             unlock_page(page);
> > +
> >               /* Check that the i965g/gm workaround works. */
> >               WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
> >       }
> > @@ -2450,8 +2461,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >  err_sg:
> >       sg_mark_end(sg);
> >  err_pages:
> > -     for_each_sgt_page(page, sgt_iter, st)
> > +     for_each_sgt_page(page, sgt_iter, st) {
> > +             lock_page(page);
> > +             munlock_vma_page(page);
> > +             unlock_page(page);
> >               put_page(page);
> > +     }
> >       sg_free_table(st);
> >       kfree(st);
> >  
> > diff --git a/mm/mlock.c b/mm/mlock.c
> > index b562b5523a65..531d9f8fd033 100644
> > --- a/mm/mlock.c
> > +++ b/mm/mlock.c
> > @@ -94,6 +94,7 @@ void mlock_vma_page(struct page *page)
> >                       putback_lru_page(page);
> >       }
> >  }
> > +EXPORT_SYMBOL_GPL(mlock_vma_page);
> >  
> >  /*
> >   * Isolate a page from LRU with optional get_page() pin.
> > @@ -211,6 +212,7 @@ unsigned int munlock_vma_page(struct page *page)
> >  out:
> >       return nr_pages - 1;
> >  }
> > +EXPORT_SYMBOL_GPL(munlock_vma_page);
> >  
> >  /*
> >   * convert get_user_pages() return value to posix mlock() error
> > -- 
> > 2.11.0
> > 
> 
> -- 
> Michal Hocko
> SUSE Labs
>
Michal Hocko Aug. 21, 2017, 2:06 p.m. UTC | #10
On Sat 19-08-17 14:15:35, Chris Wilson wrote:
> Quoting Michal Hocko (2017-06-06 13:14:18)
> > On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> > > Similar in principle to the treatment of get_user_pages, pages that
> > > i915.ko acquires from shmemfs are not immediately reclaimable and so
> > > should be excluded from the mm accounting and vmscan until they have
> > > been returned to the system via shrink_slab/i915_gem_shrink. By moving
> > > the unreclaimable pages off the inactive anon lru, not only should
> > > vmscan be improved by avoiding walking unreclaimable pages, but the
> > > system should also have a better idea of how much memory it can reclaim
> > > at that moment in time.
> > 
> > That is certainly desirable. Peter has proposed a generic pin_page (or
> > similar) API. What happened with it? I think it would be a better
> > approach than (ab)using mlock API. I am also not familiar with the i915
> > code to be sure that using lock_page is really safe here. I think that
> > all we need is to simply move those pages in/out to/from unevictable LRU
> > list on pin/unpining.
> 
> I just had the opportunity to try this mlock_vma_page() hack on a
> borderline swapping system (i.e. lots of vmpressure between i915 buffers
> and the buffercache), and marking the i915 pages as unevictable makes a
> huge difference in avoiding stalls in direct reclaim across the system.
> 
> Reading back over the thread, it seems that the simplest approach going
> forward is a small api for managing the pages on the unevictable LRU?

Yes and I thought that pin_page API would do exactly that.
Chris Wilson Aug. 21, 2017, 3:03 p.m. UTC | #11
Quoting Michal Hocko (2017-08-21 15:06:42)
> On Sat 19-08-17 14:15:35, Chris Wilson wrote:
> > Quoting Michal Hocko (2017-06-06 13:14:18)
> > > On Tue 06-06-17 13:04:36, Chris Wilson wrote:
> > > > Similar in principle to the treatment of get_user_pages, pages that
> > > > i915.ko acquires from shmemfs are not immediately reclaimable and so
> > > > should be excluded from the mm accounting and vmscan until they have
> > > > been returned to the system via shrink_slab/i915_gem_shrink. By moving
> > > > the unreclaimable pages off the inactive anon lru, not only should
> > > > vmscan be improved by avoiding walking unreclaimable pages, but the
> > > > system should also have a better idea of how much memory it can reclaim
> > > > at that moment in time.
> > > 
> > > That is certainly desirable. Peter has proposed a generic pin_page (or
> > > similar) API. What happened with it? I think it would be a better
> > > approach than (ab)using mlock API. I am also not familiar with the i915
> > > code to be sure that using lock_page is really safe here. I think that
> > > all we need is to simply move those pages in/out to/from unevictable LRU
> > > list on pin/unpining.
> > 
> > I just had the opportunity to try this mlock_vma_page() hack on a
> > borderline swapping system (i.e. lots of vmpressure between i915 buffers
> > and the buffercache), and marking the i915 pages as unevictable makes a
> > huge difference in avoiding stalls in direct reclaim across the system.
> > 
> > Reading back over the thread, it seems that the simplest approach going
> > forward is a small api for managing the pages on the unevictable LRU?
> 
> Yes and I thought that pin_page API would do exactly that.

My googlefu says "[RFC][PATCH 1/5] mm: Introduce VM_PINNED and
interfaces" is the series, and it certainly targets the very same
problem.

Peter, is that the latest version?
-Chris
Peter Zijlstra Aug. 21, 2017, 3:57 p.m. UTC | #12
On Mon, Aug 21, 2017 at 04:03:31PM +0100, Chris Wilson wrote:
> My googlefu says "[RFC][PATCH 1/5] mm: Introduce VM_PINNED and
> interfaces" is the series, and it certainly targets the very same
> problem.
> 
> Peter, is that the latest version?

Probably, I ran into the Infiniband code and couldn't convince anybody
to help me out :/ Its been stale for a few years now I'm afraid.
Christoph Lameter (Ampere) Aug. 21, 2017, 4:29 p.m. UTC | #13
On Mon, 21 Aug 2017, Peter Zijlstra wrote:

> > Peter, is that the latest version?
>
> Probably, I ran into the Infiniband code and couldn't convince anybody
> to help me out :/ Its been stale for a few years now I'm afraid.

What help do you need? CCing linux-rdma....
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8cb811519db1..37a98fbc6a12 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2193,6 +2193,9 @@  void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
+extern void mlock_vma_page(struct page *page);
+extern unsigned int munlock_vma_page(struct page *page);
+
 static void
 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
 			      struct sg_table *pages)
@@ -2214,6 +2217,10 @@  i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
 		if (obj->mm.madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
+		lock_page(page);
+		munlock_vma_page(page);
+		unlock_page(page);
+
 		put_page(page);
 	}
 	obj->mm.dirty = false;
@@ -2412,6 +2419,10 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 		}
 		last_pfn = page_to_pfn(page);
 
+		lock_page(page);
+		mlock_vma_page(page);
+		unlock_page(page);
+
 		/* Check that the i965g/gm workaround works. */
 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
 	}
@@ -2450,8 +2461,12 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 err_sg:
 	sg_mark_end(sg);
 err_pages:
-	for_each_sgt_page(page, sgt_iter, st)
+	for_each_sgt_page(page, sgt_iter, st) {
+		lock_page(page);
+		munlock_vma_page(page);
+		unlock_page(page);
 		put_page(page);
+	}
 	sg_free_table(st);
 	kfree(st);
 
diff --git a/mm/mlock.c b/mm/mlock.c
index b562b5523a65..531d9f8fd033 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -94,6 +94,7 @@  void mlock_vma_page(struct page *page)
 			putback_lru_page(page);
 	}
 }
+EXPORT_SYMBOL_GPL(mlock_vma_page);
 
 /*
  * Isolate a page from LRU with optional get_page() pin.
@@ -211,6 +212,7 @@  unsigned int munlock_vma_page(struct page *page)
 out:
 	return nr_pages - 1;
 }
+EXPORT_SYMBOL_GPL(munlock_vma_page);
 
 /*
  * convert get_user_pages() return value to posix mlock() error