diff mbox

drm/i915: Borrow our struct_mutex for the direct reclaim

Message ID 1349881739-32277-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 10, 2012, 3:08 p.m. UTC
If we have hit oom whilst holding our struct_mutex, then currently we
cannot reap our own GPU buffers which likely pin most of memory, making
an outright OOM more likely. So if we are running in direct reclaim and
already hold the mutex, attempt to free buffers knowing that the
original function can not continue until we return.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

Comments

Daniel Vetter Oct. 10, 2012, 9:02 p.m. UTC | #1
On Wed, Oct 10, 2012 at 5:08 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> If we have hit oom whilst holding our struct_mutex, then currently we
> cannot reap our own GPU buffers which likely pin most of memory, making
> an outright OOM more likely. So if we are running in direct reclaim and
> already hold the mutex, attempt to free buffers knowing that the
> original function can not continue until we return.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I've thought a bit about this, and I fear the implications. It's a
very neat trick, but now every memory alloc call could potentially
result in unpinned objects getting unbound and in active objects
getting retired. Previously we only needed to fear active objects
disappearing when calling retire_request (since that could drop the
last reference). With this chance we have many more places, and given
how often we e.g. fumbled the refcounting in the fence stealing code
I'm scared ...

/me needs to think more about this

Cheers, Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   24 +++++++++++++++++++++---
>  1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1d0cbfb..bed4084 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4589,6 +4589,18 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>         spin_unlock(&file_priv->mm.lock);
>  }
>
> +static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
> +{
> +       if (!mutex_is_locked(mutex))
> +               return false;
> +
> +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
> +       return mutex->owner == task;
> +#else
> +       return false;
> +#endif
> +}
> +
>  static int
>  i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  {
> @@ -4599,10 +4611,15 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>         struct drm_device *dev = dev_priv->dev;
>         struct drm_i915_gem_object *obj;
>         int nr_to_scan = sc->nr_to_scan;
> +       bool unlock = true;
>         int cnt;
>
> -       if (!mutex_trylock(&dev->struct_mutex))
> -               return 0;
> +       if (!mutex_trylock(&dev->struct_mutex)) {
> +               if (mutex_is_locked_by(&dev->struct_mutex, current))
> +                       unlock = false;
> +               else
> +                       return 0;
> +       }
>
>         if (nr_to_scan) {
>                 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
> @@ -4618,6 +4635,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>                 if (obj->pin_count == 0 && obj->pages_pin_count == 0)
>                         cnt += obj->base.size >> PAGE_SHIFT;
>
> -       mutex_unlock(&dev->struct_mutex);
> +       if (unlock)
> +               mutex_unlock(&dev->struct_mutex);
>         return cnt;
>  }
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ben Widawsky Nov. 8, 2012, 1:49 p.m. UTC | #2
On Wed, 10 Oct 2012 16:08:59 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> If we have hit oom whilst holding our struct_mutex, then currently we
> cannot reap our own GPU buffers which likely pin most of memory, making
> an outright OOM more likely. So if we are running in direct reclaim and
> already hold the mutex, attempt to free buffers knowing that the
> original function can not continue until we return.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I will try to review this a bit further on the plane ride home. It's
quite a scary patch, but it fixes OOM destroying IGT runs, so it's
pretty necessary IMO.

Meanwhile:
Tested-by: Ben Widawsky <ben@bwidawsk.net>

> ---
>  drivers/gpu/drm/i915/i915_gem.c |   24 +++++++++++++++++++++---
>  1 file changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1d0cbfb..bed4084 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4589,6 +4589,18 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> +static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
> +{
> +	if (!mutex_is_locked(mutex))
> +		return false;
> +
> +#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
> +	return mutex->owner == task;
> +#else
> +	return false;
> +#endif
> +}
> +
>  static int
>  i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  {
> @@ -4599,10 +4611,15 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  	struct drm_device *dev = dev_priv->dev;
>  	struct drm_i915_gem_object *obj;
>  	int nr_to_scan = sc->nr_to_scan;
> +	bool unlock = true;
>  	int cnt;
>  
> -	if (!mutex_trylock(&dev->struct_mutex))
> -		return 0;
> +	if (!mutex_trylock(&dev->struct_mutex)) {
> +		if (mutex_is_locked_by(&dev->struct_mutex, current))
> +			unlock = false;
> +		else
> +			return 0;
> +	}
>  
>  	if (nr_to_scan) {
>  		nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
> @@ -4618,6 +4635,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
>  		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
>  			cnt += obj->base.size >> PAGE_SHIFT;
>  
> -	mutex_unlock(&dev->struct_mutex);
> +	if (unlock)
> +		mutex_unlock(&dev->struct_mutex);
>  	return cnt;
>  }
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1d0cbfb..bed4084 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4589,6 +4589,18 @@  void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
+{
+	if (!mutex_is_locked(mutex))
+		return false;
+
+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
+	return mutex->owner == task;
+#else
+	return false;
+#endif
+}
+
 static int
 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 {
@@ -4599,10 +4611,15 @@  i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_i915_gem_object *obj;
 	int nr_to_scan = sc->nr_to_scan;
+	bool unlock = true;
 	int cnt;
 
-	if (!mutex_trylock(&dev->struct_mutex))
-		return 0;
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (mutex_is_locked_by(&dev->struct_mutex, current))
+			unlock = false;
+		else
+			return 0;
+	}
 
 	if (nr_to_scan) {
 		nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
@@ -4618,6 +4635,7 @@  i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 
-	mutex_unlock(&dev->struct_mutex);
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
 	return cnt;
 }