diff mbox

drm/i915: Disable shrinker for non-swapped backed objects

Message ID 1448270424-16612-1-git-send-email-chris@chris-wilson.co.uk
State New, archived
Headers show

Commit Message

Chris Wilson Nov. 23, 2015, 9:20 a.m. UTC
If the system has no available swap pages, we cannot make forward
progress in the shrinker by releasing active pages, only by releasing
purgeable pages which are immediately reaped. Take total_swap_pages into
account when counting up available objects to be shrunk and subsequently
shrinking them. By doing so, we avoid unbinding objects that cannot be
shrunk and so wasting CPU cycles flushing those objects from the GPU to
the system and then immediately back again (as they will more than
likely be reused shortly after).

Based on a patch by Akash Goel.

Reported-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Akash Goel <akash.goel@intel.com>
Cc: sourab.gupta@intel.com
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 55 ++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 16 deletions(-)

Comments

Daniel Vetter Nov. 24, 2015, 5:15 p.m. UTC | #1
On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
> If the system has no available swap pages, we cannot make forward
> progress in the shrinker by releasing active pages, only by releasing
> purgeable pages which are immediately reaped. Take total_swap_pages into
> account when counting up available objects to be shrunk and subsequently
> shrinking them. By doing so, we avoid unbinding objects that cannot be
> shrunk and so wasting CPU cycles flushing those objects from the GPU to
> the system and then immediately back again (as they will more than
> likely be reused shortly after).
> 
> Based on a patch by Akash Goel.
> 
> Reported-by: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel@intel.com>
> Cc: sourab.gupta@intel.com

Cc: linux-mm@kvack.org should be done on this one, just in case they have
ideas for proper interfaces for this. Which might be, given that Jerome
Glisse is working on swaput-to-vram and other fun stuff like that.

Also, how does stuff like zswap (or whatever "compress my swap in memory"
is called again) factor in here? Iirc Android very much does use that.

i915 side looks fine, but I'd like an ack from a core mm hacker for this.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 55 ++++++++++++++++++++++----------
>  1 file changed, 39 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index f7df54a8ee2b..0823f321b7de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -47,6 +47,41 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
>  #endif
>  }
>  
> +static int num_vma_bound(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma;
> +	int count = 0;
> +
> +	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +		if (drm_mm_node_allocated(&vma->node))
> +			count++;
> +		if (vma->pin_count)
> +			count++;
> +	}
> +
> +	return count;
> +}
> +
> +static bool can_release_pages(struct drm_i915_gem_object *obj)
> +{
> +	/* Only report true if by unbinding the object and putting its pages
> +	 * we can actually make forward progress towards freeing physical
> +	 * pages.
> +	 *
> +	 * If the pages are pinned for any other reason than being bound
> +	 * to the GPU, simply unbinding from the GPU is not going to succeed
> +	 * in release our pin count on the pages themselves.
> +	 */
> +	if (obj->pages_pin_count != num_vma_bound(obj))
> +		return false;
> +
> +	/* We can only return physical pages if we either discard them
> +	 * (because the user has marked them as being purgeable) or if
> +	 * we can move their contents out to swap.
> +	 */
> +	return total_swap_pages || obj->madv == I915_MADV_DONTNEED;
> +}
> +
>  /**
>   * i915_gem_shrink - Shrink buffer object caches
>   * @dev_priv: i915 device
> @@ -129,6 +164,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
>  				continue;
>  
> +			if (!can_release_pages(obj))
> +				continue;
> +
>  			drm_gem_object_reference(&obj->base);
>  
>  			/* For the unbound phase, this should be a no-op! */
> @@ -188,21 +226,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
>  	return true;
>  }
>  
> -static int num_vma_bound(struct drm_i915_gem_object *obj)
> -{
> -	struct i915_vma *vma;
> -	int count = 0;
> -
> -	list_for_each_entry(vma, &obj->vma_list, vma_link) {
> -		if (drm_mm_node_allocated(&vma->node))
> -			count++;
> -		if (vma->pin_count)
> -			count++;
> -	}
> -
> -	return count;
> -}
> -
>  static unsigned long
>  i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  {
> @@ -222,7 +245,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (!obj->active && obj->pages_pin_count == num_vma_bound(obj))
> +		if (!obj->active && can_release_pages(obj))
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> -- 
> 2.6.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Nov. 24, 2015, 11:17 p.m. UTC | #2
On Tue, Nov 24, 2015 at 06:15:47PM +0100, Daniel Vetter wrote:
> On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
> > If the system has no available swap pages, we cannot make forward
> > progress in the shrinker by releasing active pages, only by releasing
> > purgeable pages which are immediately reaped. Take total_swap_pages into
> > account when counting up available objects to be shrunk and subsequently
> > shrinking them. By doing so, we avoid unbinding objects that cannot be
> > shrunk and so wasting CPU cycles flushing those objects from the GPU to
> > the system and then immediately back again (as they will more than
> > likely be reused shortly after).
> > 
> > Based on a patch by Akash Goel.
> > 
> > Reported-by: Akash Goel <akash.goel@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Akash Goel <akash.goel@intel.com>
> > Cc: sourab.gupta@intel.com
> 
> Cc: linux-mm@kvack.org should be done on this one, just in case they have
> ideas for proper interfaces for this. Which might be, given that Jerome
> Glisse is working on swaput-to-vram and other fun stuff like that.
> 
> Also, how does stuff like zswap (or whatever "compress my swap in memory"
> is called again) factor in here? Iirc Android very much does use that.

It doesn't. We would need

#include <linux/frontswap.h>

static bool swap_available(void)
{
	return total_swap_pages || frontswap_enabled;
}

But if that then returns true for Android it seems the primary usecase
is invalidated.
-Chris
Daniel Vetter Nov. 25, 2015, 9:17 a.m. UTC | #3
On Tue, Nov 24, 2015 at 11:17:38PM +0000, Chris Wilson wrote:
> On Tue, Nov 24, 2015 at 06:15:47PM +0100, Daniel Vetter wrote:
> > On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
> > > If the system has no available swap pages, we cannot make forward
> > > progress in the shrinker by releasing active pages, only by releasing
> > > purgeable pages which are immediately reaped. Take total_swap_pages into
> > > account when counting up available objects to be shrunk and subsequently
> > > shrinking them. By doing so, we avoid unbinding objects that cannot be
> > > shrunk and so wasting CPU cycles flushing those objects from the GPU to
> > > the system and then immediately back again (as they will more than
> > > likely be reused shortly after).
> > > 
> > > Based on a patch by Akash Goel.
> > > 
> > > Reported-by: Akash Goel <akash.goel@intel.com>
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Akash Goel <akash.goel@intel.com>
> > > Cc: sourab.gupta@intel.com
> > 
> > Cc: linux-mm@kvack.org should be done on this one, just in case they have
> > ideas for proper interfaces for this. Which might be, given that Jerome
> > Glisse is working on swaput-to-vram and other fun stuff like that.
> > 
> > Also, how does stuff like zswap (or whatever "compress my swap in memory"
> > is called again) factor in here? Iirc Android very much does use that.
> 
> It doesn't. We would need
> 
> #include <linux/frontswap.h>
> 
> static bool swap_available(void)
> {
> 	return total_swap_pages || frontswap_enabled;
> }
> 
> But if that then returns true for Android it seems the primary usecase
> is invalidated.

Well swapping to frontswap should be ok. Trashing not so much, and if we
do that I suspect there's something really loopsided with memory usage
balancing going on ... Does the android workload have your "only shrink
inactive" patch already?
-Daniel
Chris Wilson Nov. 25, 2015, 9:58 a.m. UTC | #4
On Wed, Nov 25, 2015 at 10:17:49AM +0100, Daniel Vetter wrote:
> On Tue, Nov 24, 2015 at 11:17:38PM +0000, Chris Wilson wrote:
> > On Tue, Nov 24, 2015 at 06:15:47PM +0100, Daniel Vetter wrote:
> > > On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
> > > > If the system has no available swap pages, we cannot make forward
> > > > progress in the shrinker by releasing active pages, only by releasing
> > > > purgeable pages which are immediately reaped. Take total_swap_pages into
> > > > account when counting up available objects to be shrunk and subsequently
> > > > shrinking them. By doing so, we avoid unbinding objects that cannot be
> > > > shrunk and so wasting CPU cycles flushing those objects from the GPU to
> > > > the system and then immediately back again (as they will more than
> > > > likely be reused shortly after).
> > > > 
> > > > Based on a patch by Akash Goel.
> > > > 
> > > > Reported-by: Akash Goel <akash.goel@intel.com>
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Akash Goel <akash.goel@intel.com>
> > > > Cc: sourab.gupta@intel.com
> > > 
> > > Cc: linux-mm@kvack.org should be done on this one, just in case they have
> > > ideas for proper interfaces for this. Which might be, given that Jerome
> > > Glisse is working on swaput-to-vram and other fun stuff like that.
> > > 
> > > Also, how does stuff like zswap (or whatever "compress my swap in memory"
> > > is called again) factor in here? Iirc Android very much does use that.
> > 
> > It doesn't. We would need
> > 
> > #include <linux/frontswap.h>
> > 
> > static bool swap_available(void)
> > {
> > 	return total_swap_pages || frontswap_enabled;
> > }
> > 
> > But if that then returns true for Android it seems the primary usecase
> > is invalidated.
> 
> Well swapping to frontswap should be ok. Trashing not so much, and if we
> do that I suspect there's something really loopsided with memory usage
> balancing going on ... Does the android workload have your "only shrink
> inactive" patch already?

I'll let Akash or Sourab comment, but the background to the patch was
that they observed that under memory pressure a framebuffer was being
unbound (obviously not pinned as a current scanout) and then rebound
(clflushing both ways ofc). My gut says that the priority lists in the
kernel and userspace are akilter if we either fail to purge the LRU
object in the kernel or if userspace then doesn't try to reuse the MRU
backbuffer. One thing I did notice when also dealing with memory
pressure flushing backbuffers was (a) they were unaligned and so needed
rebinding before pinning
http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=df636036d120c6227d1918cfd6d70232d8d37b4c
and (b) we didn't bump the scanout on the inactive list
http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=3a23ff3e5e201a52068d6e9d65f4ffb95077c21e
-Chris
akash.goel@intel.com Nov. 25, 2015, 1:36 p.m. UTC | #5
On 11/25/2015 3:28 PM, Chris Wilson wrote:
> On Wed, Nov 25, 2015 at 10:17:49AM +0100, Daniel Vetter wrote:
>> On Tue, Nov 24, 2015 at 11:17:38PM +0000, Chris Wilson wrote:
>>> On Tue, Nov 24, 2015 at 06:15:47PM +0100, Daniel Vetter wrote:
>>>> On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
>>>>> If the system has no available swap pages, we cannot make forward
>>>>> progress in the shrinker by releasing active pages, only by releasing
>>>>> purgeable pages which are immediately reaped. Take total_swap_pages into
>>>>> account when counting up available objects to be shrunk and subsequently
>>>>> shrinking them. By doing so, we avoid unbinding objects that cannot be
>>>>> shrunk and so wasting CPU cycles flushing those objects from the GPU to
>>>>> the system and then immediately back again (as they will more than
>>>>> likely be reused shortly after).
>>>>>
>>>>> Based on a patch by Akash Goel.
>>>>>
>>>>> Reported-by: Akash Goel <akash.goel@intel.com>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Akash Goel <akash.goel@intel.com>
>>>>> Cc: sourab.gupta@intel.com
>>>>
>>>> Cc: linux-mm@kvack.org should be done on this one, just in case they have
>>>> ideas for proper interfaces for this. Which might be, given that Jerome
>>>> Glisse is working on swaput-to-vram and other fun stuff like that.
>>>>
>>>> Also, how does stuff like zswap (or whatever "compress my swap in memory"
>>>> is called again) factor in here? Iirc Android very much does use that.
>>>
>>> It doesn't. We would need
>>>
>>> #include <linux/frontswap.h>
>>>
>>> static bool swap_available(void)
>>> {
>>> 	return total_swap_pages || frontswap_enabled;
>>> }
>>>
>>> But if that then returns true for Android it seems the primary usecase
>>> is invalidated.

Though CONFIG_FRONTSWAP is not set yet, but recently ZRAM (Compressed 
Swap in RAM) has been enabled on some devices, so 'total_swap_pages' 
will be nonzero on those devices.

>>
>> Well swapping to frontswap should be ok. Trashing not so much, and if we
>> do that I suspect there's something really loopsided with memory usage
>> balancing going on ... Does the android workload have your "only shrink
>> inactive" patch already?
>

Sorry the "only shrink inactive" patch has not been included yet.
Will pull these 2 patches.
5763ff0 drm/i915: Avoid GPU stalls from kswapd
c9c0f5e drm/i915: During shrink_all we only need to idle the GPU

Best regards
Akash

> I'll let Akash or Sourab comment, but the background to the patch was
> that they observed that under memory pressure a framebuffer was being
> unbound (obviously not pinned as a current scanout) and then rebound
> (clflushing both ways ofc). My gut says that the priority lists in the
> kernel and userspace are akilter if we either fail to purge the LRU
> object in the kernel or if userspace then doesn't try to reuse the MRU
> backbuffer.
> One thing I did notice when also dealing with memory
> pressure flushing backbuffers was (a) they were unaligned and so needed
> rebinding before pinning
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=df636036d120c6227d1918cfd6d70232d8d37b4c
> and (b) we didn't bump the scanout on the inactive list
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=3a23ff3e5e201a52068d6e9d65f4ffb95077c21e
> -Chris
>
Daniel Vetter Nov. 26, 2015, 9:34 a.m. UTC | #6
On Wed, Nov 25, 2015 at 09:58:28AM +0000, Chris Wilson wrote:
> On Wed, Nov 25, 2015 at 10:17:49AM +0100, Daniel Vetter wrote:
> > On Tue, Nov 24, 2015 at 11:17:38PM +0000, Chris Wilson wrote:
> > > On Tue, Nov 24, 2015 at 06:15:47PM +0100, Daniel Vetter wrote:
> > > > On Mon, Nov 23, 2015 at 09:20:24AM +0000, Chris Wilson wrote:
> > > > > If the system has no available swap pages, we cannot make forward
> > > > > progress in the shrinker by releasing active pages, only by releasing
> > > > > purgeable pages which are immediately reaped. Take total_swap_pages into
> > > > > account when counting up available objects to be shrunk and subsequently
> > > > > shrinking them. By doing so, we avoid unbinding objects that cannot be
> > > > > shrunk and so wasting CPU cycles flushing those objects from the GPU to
> > > > > the system and then immediately back again (as they will more than
> > > > > likely be reused shortly after).
> > > > > 
> > > > > Based on a patch by Akash Goel.
> > > > > 
> > > > > Reported-by: Akash Goel <akash.goel@intel.com>
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > Cc: Akash Goel <akash.goel@intel.com>
> > > > > Cc: sourab.gupta@intel.com
> > > > 
> > > > Cc: linux-mm@kvack.org should be done on this one, just in case they have
> > > > ideas for proper interfaces for this. Which might be, given that Jerome
> > > > Glisse is working on swaput-to-vram and other fun stuff like that.
> > > > 
> > > > Also, how does stuff like zswap (or whatever "compress my swap in memory"
> > > > is called again) factor in here? Iirc Android very much does use that.
> > > 
> > > It doesn't. We would need
> > > 
> > > #include <linux/frontswap.h>
> > > 
> > > static bool swap_available(void)
> > > {
> > > 	return total_swap_pages || frontswap_enabled;
> > > }
> > > 
> > > But if that then returns true for Android it seems the primary usecase
> > > is invalidated.
> > 
> > Well swapping to frontswap should be ok. Trashing not so much, and if we
> > do that I suspect there's something really loopsided with memory usage
> > balancing going on ... Does the android workload have your "only shrink
> > inactive" patch already?
> 
> I'll let Akash or Sourab comment, but the background to the patch was
> that they observed that under memory pressure a framebuffer was being
> unbound (obviously not pinned as a current scanout) and then rebound
> (clflushing both ways ofc). My gut says that the priority lists in the
> kernel and userspace are akilter if we either fail to purge the LRU
> object in the kernel or if userspace then doesn't try to reuse the MRU
> backbuffer. One thing I did notice when also dealing with memory
> pressure flushing backbuffers was (a) they were unaligned and so needed
> rebinding before pinning
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=df636036d120c6227d1918cfd6d70232d8d37b4c

Not sure I read this correctly, but shouldn't we cache the alignment for
as long as the buffer isn't purged? Your patch resets when we unpin the
last display user. So in your scenario above that could result in an
unaligned rebinding for GT first, then aligned rebinding for display. I
figured the idea is to get things right for the render right away?

Only risk is that we might overalign things, but that only happens when
userspace reuses fbs and non-fbs in a mixed fashion. But that shouldn't be
a real problem I think.

> and (b) we didn't bump the scanout on the inactive list
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=3a23ff3e5e201a52068d6e9d65f4ffb95077c21e

Yeah bumping the inactive list when we unpin from display definitely makes
sense. Of course it only plays well together with the userspace fb cache
if that is indeed MRU, and I think Android's isn't. It's all strictly fifo
buffer queues, both between kernel and surface flinger and between surface
flinger and clients.
-Daniel
Chris Wilson Nov. 26, 2015, 10:30 a.m. UTC | #7
On Thu, Nov 26, 2015 at 10:34:51AM +0100, Daniel Vetter wrote:
> On Wed, Nov 25, 2015 at 09:58:28AM +0000, Chris Wilson wrote:
> > One thing I did notice when also dealing with memory
> > pressure flushing backbuffers was (a) they were unaligned and so needed
> > rebinding before pinning
> > http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=df636036d120c6227d1918cfd6d70232d8d37b4c
> 
> Not sure I read this correctly, but shouldn't we cache the alignment for
> as long as the buffer isn't purged? Your patch resets when we unpin the
> last display user. So in your scenario above that could result in an
> unaligned rebinding for GT first, then aligned rebinding for display. I
> figured the idea is to get things right for the render right away?

It was focused on the solving the problem that scanout needed to realign
the buffer. I felt that keeping the maximum alignment imposed by the
user was just asking for trouble. (It's actually a bug in that patch
that the alignment is reset there, it should be when
framebuffer_references drops to zero. Also note that is depends upon the
vma being persistent until closed.)

> Only risk is that we might overalign things, but that only happens when
> userspace reuses fbs and non-fbs in a mixed fashion. But that shouldn't be
> a real problem I think.

Probably not, just I don't trust them! The goal is keep the maximum
restriction for only as long as it makes sense. We want relaxed fenced
layout (because space is at a scarce resource on that hw), so always
binding a tiled object at its max alignment is counter productive.
framebuffers are typically only created for as long as required (give or
take a small amount of caching, either in the flip-sequence or by a
timer on idle). So keeping the fb's vma aligned seems a worthwhile
tradeoff to avoid having to rebind it just as we want to present it to
the screen. We have no time bounds on the user alignment, so that will
seem to be always at odds with reducing the alignment for improved packing
at the earliest opportunity.

I'm pretty certain that fb alignment is the only restriction we wish to
keep.
-Chris
Daniel Vetter Nov. 26, 2015, 11:36 a.m. UTC | #8
On Thu, Nov 26, 2015 at 10:30:57AM +0000, Chris Wilson wrote:
> On Thu, Nov 26, 2015 at 10:34:51AM +0100, Daniel Vetter wrote:
> > On Wed, Nov 25, 2015 at 09:58:28AM +0000, Chris Wilson wrote:
> > > One thing I did notice when also dealing with memory
> > > pressure flushing backbuffers was (a) they were unaligned and so needed
> > > rebinding before pinning
> > > http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=df636036d120c6227d1918cfd6d70232d8d37b4c
> > 
> > Not sure I read this correctly, but shouldn't we cache the alignment for
> > as long as the buffer isn't purged? Your patch resets when we unpin the
> > last display user. So in your scenario above that could result in an
> > unaligned rebinding for GT first, then aligned rebinding for display. I
> > figured the idea is to get things right for the render right away?
> 
> It was focused on the solving the problem that scanout needed to realign
> the buffer. I felt that keeping the maximum alignment imposed by the
> user was just asking for trouble. (It's actually a bug in that patch
> that the alignment is reset there, it should be when
> framebuffer_references drops to zero. Also note that is depends upon the
> vma being persistent until closed.)
> 
> > Only risk is that we might overalign things, but that only happens when
> > userspace reuses fbs and non-fbs in a mixed fashion. But that shouldn't be
> > a real problem I think.
> 
> Probably not, just I don't trust them! The goal is keep the maximum
> restriction for only as long as it makes sense. We want relaxed fenced
> layout (because space is at a scarce resource on that hw), so always
> binding a tiled object at its max alignment is counter productive.
> framebuffers are typically only created for as long as required (give or
> take a small amount of caching, either in the flip-sequence or by a
> timer on idle). So keeping the fb's vma aligned seems a worthwhile
> tradeoff to avoid having to rebind it just as we want to present it to
> the screen. We have no time bounds on the user alignment, so that will
> seem to be always at odds with reducing the alignment for improved packing
> at the earliest opportunity.
> 
> I'm pretty certain that fb alignment is the only restriction we wish to
> keep.

Yeah, keeping fb alignment until fb_refs drops to 0 makes sense.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f7df54a8ee2b..0823f321b7de 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -47,6 +47,41 @@  static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
 
+static int num_vma_bound(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	int count = 0;
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (drm_mm_node_allocated(&vma->node))
+			count++;
+		if (vma->pin_count)
+			count++;
+	}
+
+	return count;
+}
+
+static bool can_release_pages(struct drm_i915_gem_object *obj)
+{
+	/* Only report true if by unbinding the object and putting its pages
+	 * we can actually make forward progress towards freeing physical
+	 * pages.
+	 *
+	 * If the pages are pinned for any other reason than being bound
+	 * to the GPU, simply unbinding from the GPU is not going to succeed
+	 * in release our pin count on the pages themselves.
+	 */
+	if (obj->pages_pin_count != num_vma_bound(obj))
+		return false;
+
+	/* We can only return physical pages if we either discard them
+	 * (because the user has marked them as being purgeable) or if
+	 * we can move their contents out to swap.
+	 */
+	return total_swap_pages || obj->madv == I915_MADV_DONTNEED;
+}
+
 /**
  * i915_gem_shrink - Shrink buffer object caches
  * @dev_priv: i915 device
@@ -129,6 +164,9 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
 				continue;
 
+			if (!can_release_pages(obj))
+				continue;
+
 			drm_gem_object_reference(&obj->base);
 
 			/* For the unbound phase, this should be a no-op! */
@@ -188,21 +226,6 @@  static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	return true;
 }
 
-static int num_vma_bound(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-	int count = 0;
-
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (drm_mm_node_allocated(&vma->node))
-			count++;
-		if (vma->pin_count)
-			count++;
-	}
-
-	return count;
-}
-
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
@@ -222,7 +245,7 @@  i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!obj->active && obj->pages_pin_count == num_vma_bound(obj))
+		if (!obj->active && can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}