diff mbox

[5/5] drm/i915: Avoid GPU stalls from kswapd

Message ID 1443698309-28038-5-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 1, 2015, 11:18 a.m. UTC
Exclude active GPU pages from the purview of the background shrinker
(kswapd), as these cause uncontrollable GPU stalls. Given that the
shrinker is rerun until the freelists are satisfied, we should have
opportunity in subsequent passes to recover the pages once idle. If the
machine does run out of memory entirely, we have the forced idling in the
oom-notifier as a means of releasing all the pages we can before an oom
is prematurely executed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          | 1 +
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

Comments

Daniel Vetter Oct. 6, 2015, 1:01 p.m. UTC | #1
On Thu, Oct 01, 2015 at 12:18:29PM +0100, Chris Wilson wrote:
> Exclude active GPU pages from the purview of the background shrinker
> (kswapd), as these cause uncontrollable GPU stalls. Given that the
> shrinker is rerun until the freelists are satisfied, we should have
> opportunity in subsequent passes to recover the pages once idle. If the
> machine does run out of memory entirely, we have the forced idling in the
> oom-notifier as a means of releasing all the pages we can before an oom
> is prematurely executed.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

lgtm, but imo we should move the retire_requests from an earlier patch to
this one.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 1 +
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 9 +++++++--
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64b8929acdf2..a443310a3598 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3159,6 +3159,7 @@ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
>  #define I915_SHRINK_PURGEABLE 0x1
>  #define I915_SHRINK_UNBOUND 0x2
>  #define I915_SHRINK_BOUND 0x4
> +#define I915_SHRINK_ACTIVE 0x8
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
>  void i915_gem_shrinker_init(struct drm_i915_private *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 2058d162aeb9..b2ccb7346899 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -126,6 +126,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  			    obj->madv != I915_MADV_DONTNEED)
>  				continue;
>  
> +			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
> +				continue;
> +
>  			drm_gem_object_reference(&obj->base);
>  
>  			/* For the unbound phase, this should be a no-op! */
> @@ -164,7 +167,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
>  {
>  	return i915_gem_shrink(dev_priv, -1UL,
> -			       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND);
> +			       I915_SHRINK_BOUND |
> +			       I915_SHRINK_UNBOUND |
> +			       I915_SHRINK_ACTIVE);
>  }
>  
>  static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
> @@ -217,7 +222,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (obj->pages_pin_count == num_vma_bound(obj))
> +		if (!obj->active && obj->pages_pin_count == num_vma_bound(obj))
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 6, 2015, 1:18 p.m. UTC | #2
On Tue, Oct 06, 2015 at 03:01:45PM +0200, Daniel Vetter wrote:
> On Thu, Oct 01, 2015 at 12:18:29PM +0100, Chris Wilson wrote:
> > Exclude active GPU pages from the purview of the background shrinker
> > (kswapd), as these cause uncontrollable GPU stalls. Given that the
> > shrinker is rerun until the freelists are satisfied, we should have
> > opportunity in subsequent passes to recover the pages once idle. If the
> > machine does run out of memory entirely, we have the forced idling in the
> > oom-notifier as a means of releasing all the pages we can before an oom
> > is prematurely executed.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
> 
> lgtm, but imo we should move the retire_requests from an earlier patch to
> this one.

I am not convinced. The retire_requests are there for their own reasons
(to cover up cracks elsewhere) and not because we need them for retiring
active objects.
-Chris
Daniel Vetter Oct. 7, 2015, 1:51 p.m. UTC | #3
On Tue, Oct 06, 2015 at 02:18:34PM +0100, Chris Wilson wrote:
> On Tue, Oct 06, 2015 at 03:01:45PM +0200, Daniel Vetter wrote:
> > On Thu, Oct 01, 2015 at 12:18:29PM +0100, Chris Wilson wrote:
> > > Exclude active GPU pages from the purview of the background shrinker
> > > (kswapd), as these cause uncontrollable GPU stalls. Given that the
> > > shrinker is rerun until the freelists are satisfied, we should have
> > > opportunity in subsequent passes to recover the pages once idle. If the
> > > machine does run out of memory entirely, we have the forced idling in the
> > > oom-notifier as a means of releasing all the pages we can before an oom
> > > is prematurely executed.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
> > 
> > lgtm, but imo we should move the retire_requests from an earlier patch to
> > this one.
> 
> I am not convinced. The retire_requests are there for their own reasons
> (to cover up cracks elsewhere) and not because we need them for retiring
> active objects.

Ok pulled in all of them, with notes added for the patches where I had
questions.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64b8929acdf2..a443310a3598 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3159,6 +3159,7 @@  unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
 #define I915_SHRINK_PURGEABLE 0x1
 #define I915_SHRINK_UNBOUND 0x2
 #define I915_SHRINK_BOUND 0x4
+#define I915_SHRINK_ACTIVE 0x8
 unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 void i915_gem_shrinker_init(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 2058d162aeb9..b2ccb7346899 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -126,6 +126,9 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 			    obj->madv != I915_MADV_DONTNEED)
 				continue;
 
+			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
+				continue;
+
 			drm_gem_object_reference(&obj->base);
 
 			/* For the unbound phase, this should be a no-op! */
@@ -164,7 +167,9 @@  i915_gem_shrink(struct drm_i915_private *dev_priv,
 unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
 	return i915_gem_shrink(dev_priv, -1UL,
-			       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND);
+			       I915_SHRINK_BOUND |
+			       I915_SHRINK_UNBOUND |
+			       I915_SHRINK_ACTIVE);
 }
 
 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
@@ -217,7 +222,7 @@  i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (obj->pages_pin_count == num_vma_bound(obj))
+		if (!obj->active && obj->pages_pin_count == num_vma_bound(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}