diff mbox

[v2,3/3] drm/i915/shrinker: Hook up vmap allocation failure notifier

Message ID 1459777603-23618-4-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 4, 2016, 1:46 p.m. UTC
If the core runs out of vmap address space, it will call a notifier in
case any driver can reap some of its vmaps. As i915.ko is possibily
holding onto vmap address space that could be recovered, hook into the
notifier chain and try and reap objects holding onto vmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Roman Pen <r.peniaev@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kahola <mika.kahola@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  1 +
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 39 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

Comments

Joonas Lahtinen April 5, 2016, 8:19 a.m. UTC | #1
On ma, 2016-04-04 at 14:46 +0100, Chris Wilson wrote:
> If the core runs out of vmap address space, it will call a notifier in
> case any driver can reap some of its vmaps. As i915.ko is possibily
> holding onto vmap address space that could be recovered, hook into the
> notifier chain and try and reap objects holding onto vmaps.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Roman Pen <r.peniaev@gmail.com>
> Cc: Mel Gorman <mgorman@techsingularity.net>
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

A comment below. But regardless;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kahola <mika.kahola@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  1 +
>  drivers/gpu/drm/i915/i915_gem_shrinker.c | 39 ++++++++++++++++++++++++++++++++
>  2 files changed, 40 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dd187727c813..6443745d4182 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1257,6 +1257,7 @@ struct i915_gem_mm {
>  	struct i915_hw_ppgtt *aliasing_ppgtt;
>  
>  	struct notifier_block oom_notifier;
> +	struct notifier_block vmap_notifier;
>  	struct shrinker shrinker;
>  	bool shrinker_no_lock_stealing;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index e391ee3ec486..be7501afb59e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -356,6 +357,40 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>  	return NOTIFY_DONE;
>  }
>  
> +static int
> +i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
> +	struct drm_device *dev = dev_priv->dev;
> +	unsigned long timeout = msecs_to_jiffies(5000) + 1;
> +	unsigned long freed_pages;
> +	bool was_interruptible;
> +	bool unlock;
> +
> +	while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) {
> +		schedule_timeout_killable(1);
> +		if (fatal_signal_pending(current))
> +			return NOTIFY_DONE;
> +	}
> +	if (timeout == 0) {
> +		pr_err("Unable to purge GPU vmaps due to lock contention.\n");
> +		return NOTIFY_DONE;
> +	}
> +
> +	was_interruptible = dev_priv->mm.interruptible;
> +	dev_priv->mm.interruptible = false;
> +
> +	freed_pages = i915_gem_shrink_all(dev_priv);
> +
> +	dev_priv->mm.interruptible = was_interruptible;

Up until here this is same function as the oom shrinker, so I would
combine these two and here do "if (vmap) goto out;" sort of thing.

Would just need a way to distinct between two calling sites. I did not
come up with a quick solution as both are passing 0 as event.

Regards, Joonas

> +	if (unlock)
> +		mutex_unlock(&dev->struct_mutex);
> +
> +	*(unsigned long *)ptr += freed_pages;
> +	return NOTIFY_DONE;
> +}
> +
>  /**
>   * i915_gem_shrinker_init - Initialize i915 shrinker
>   * @dev_priv: i915 device
> @@ -371,6 +406,9 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
>  
>  	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
>  	WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier));
> +
> +	dev_priv->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
> +	WARN_ON(register_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
>  }
>  
>  /**
> @@ -381,6 +419,7 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
>   */
>  void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv)
>  {
> +	WARN_ON(unregister_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
>  	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
>  	unregister_shrinker(&dev_priv->mm.shrinker);
>  }
Chris Wilson April 5, 2016, 9:03 a.m. UTC | #2
On Tue, Apr 05, 2016 at 11:19:38AM +0300, Joonas Lahtinen wrote:
> On ma, 2016-04-04 at 14:46 +0100, Chris Wilson wrote:
> > If the core runs out of vmap address space, it will call a notifier in
> > case any driver can reap some of its vmaps. As i915.ko is possibily
> > holding onto vmap address space that could be recovered, hook into the
> > notifier chain and try and reap objects holding onto vmaps.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: David Rientjes <rientjes@google.com>
> > Cc: Roman Pen <r.peniaev@gmail.com>
> > Cc: Mel Gorman <mgorman@techsingularity.net>
> > Cc: linux-mm@kvack.org
> > Cc: linux-kernel@vger.kernel.org
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> A comment below. But regardless;
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Mika Kahola <mika.kahola@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h          |  1 +
> >  drivers/gpu/drm/i915/i915_gem_shrinker.c | 39 ++++++++++++++++++++++++++++++++
> >  2 files changed, 40 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index dd187727c813..6443745d4182 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1257,6 +1257,7 @@ struct i915_gem_mm {
> >  	struct i915_hw_ppgtt *aliasing_ppgtt;
> >  
> >  	struct notifier_block oom_notifier;
> > +	struct notifier_block vmap_notifier;
> >  	struct shrinker shrinker;
> >  	bool shrinker_no_lock_stealing;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > index e391ee3ec486..be7501afb59e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> > @@ -28,6 +28,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  
> > @@ -356,6 +357,40 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
> >  	return NOTIFY_DONE;
> >  }
> >  
> > +static int
> > +i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
> > +{
> > +	struct drm_i915_private *dev_priv =
> > +		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
> > +	struct drm_device *dev = dev_priv->dev;
> > +	unsigned long timeout = msecs_to_jiffies(5000) + 1;
> > +	unsigned long freed_pages;
> > +	bool was_interruptible;
> > +	bool unlock;
> > +
> > +	while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) {
> > +		schedule_timeout_killable(1);
> > +		if (fatal_signal_pending(current))
> > +			return NOTIFY_DONE;
> > +	}
> > +	if (timeout == 0) {
> > +		pr_err("Unable to purge GPU vmaps due to lock contention.\n");
> > +		return NOTIFY_DONE;
> > +	}
> > +
> > +	was_interruptible = dev_priv->mm.interruptible;
> > +	dev_priv->mm.interruptible = false;
> > +
> > +	freed_pages = i915_gem_shrink_all(dev_priv);
> > +
> > +	dev_priv->mm.interruptible = was_interruptible;
> 
> Up until here this is same function as the oom shrinker, so I would
> combine these two and here do "if (vmap) goto out;" sort of thing.
> 
> Would just need a way to distinct between two calling sites. I did not
> come up with a quick solution as both are passing 0 as event.

Less thrilled about merging the two notifier callbacks, but we could
wrap i915_gem_shrinker_lock_killable().
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dd187727c813..6443745d4182 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1257,6 +1257,7 @@  struct i915_gem_mm {
 	struct i915_hw_ppgtt *aliasing_ppgtt;
 
 	struct notifier_block oom_notifier;
+	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
 	bool shrinker_no_lock_stealing;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index e391ee3ec486..be7501afb59e 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -28,6 +28,7 @@ 
 #include <linux/swap.h>
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
+#include <linux/vmalloc.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 
@@ -356,6 +357,40 @@  i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	return NOTIFY_DONE;
 }
 
+static int
+i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
+	struct drm_device *dev = dev_priv->dev;
+	unsigned long timeout = msecs_to_jiffies(5000) + 1;
+	unsigned long freed_pages;
+	bool was_interruptible;
+	bool unlock;
+
+	while (!i915_gem_shrinker_lock(dev, &unlock) && --timeout) {
+		schedule_timeout_killable(1);
+		if (fatal_signal_pending(current))
+			return NOTIFY_DONE;
+	}
+	if (timeout == 0) {
+		pr_err("Unable to purge GPU vmaps due to lock contention.\n");
+		return NOTIFY_DONE;
+	}
+
+	was_interruptible = dev_priv->mm.interruptible;
+	dev_priv->mm.interruptible = false;
+
+	freed_pages = i915_gem_shrink_all(dev_priv);
+
+	dev_priv->mm.interruptible = was_interruptible;
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	*(unsigned long *)ptr += freed_pages;
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_gem_shrinker_init - Initialize i915 shrinker
  * @dev_priv: i915 device
@@ -371,6 +406,9 @@  void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
 	WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier));
+
+	dev_priv->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
+	WARN_ON(register_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
 }
 
 /**
@@ -381,6 +419,7 @@  void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
  */
 void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv)
 {
+	WARN_ON(unregister_vmap_purge_notifier(&dev_priv->mm.vmap_notifier));
 	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
 	unregister_shrinker(&dev_priv->mm.shrinker);
 }