Message ID | 20181204141522.13640-5-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/7] drm/i915: Allocate a common scratch page | expand |
On 04/12/2018 14:15, Chris Wilson wrote: > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex > in the shrinker while performing direct-reclaim. The trade-off being > (much) lower latency for non-i915 clients at an increased risk of being > unable to obtain a page from direct-reclaim without hitting the > oom-notifier. The proviso being that we still keep trying to hard > obtain the lock for oom so that we can reap under heavy memory pressure. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++-- > drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++------------- > 2 files changed, 13 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index c5f01964f0fb..1cad218b71d3 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > __i915_gem_object_unpin_pages(obj); > } > > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ > I915_MM_NORMAL = 0, > - I915_MM_SHRINKER > + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ > }; > > void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > index ea90d3a0d511..d461f458f4af 100644 > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > @@ -36,7 +36,9 @@ > #include "i915_drv.h" > #include "i915_trace.h" > > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > +static bool shrinker_lock(struct drm_i915_private *i915, > + unsigned int flags, > + bool *unlock) > { > switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { > case MUTEX_TRYLOCK_RECURSIVE: > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > case MUTEX_TRYLOCK_FAILED: > *unlock = false; > - preempt_disable(); > - do { > - cpu_relax(); > - if (mutex_trylock(&i915->drm.struct_mutex)) { > - *unlock = true; > - break; > - } > - } while (!need_resched()); > - preempt_enable(); > + if (flags & I915_SHRINK_ACTIVE) { > + mutex_lock_nested(&i915->drm.struct_mutex, > + I915_MM_SHRINKER); > + *unlock = true; > + } I just realized once oddity in the shrinker code which escaped me before. It is the fact the call paths will call the shrinker_lock twice. For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They both first take lock with flags of zero, and then they call i915_gem_shrink which takes the lock again, which obviously always results in the recursive path to be taken. I think we need to clean this up so it is easier to understand the code before further tweaking, even if in this patch. For instance adding I915_SHRINK_LOCKED would solve it. shrinker_lock_uninterruptible is also funky in that it doesn't respect the timeout in the waiting for idle phase. Sounds reasonable? Regards, Tvrtko > return *unlock; > > case MUTEX_TRYLOCK_SUCCESS: > @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915, > unsigned long scanned = 0; > bool unlock; > > - if (!shrinker_lock(i915, &unlock)) > + if (!shrinker_lock(i915, flags, &unlock)) > return 0; > > /* > @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) > > sc->nr_scanned = 0; > > - if (!shrinker_lock(i915, &unlock)) > + if (!shrinker_lock(i915, 0, &unlock)) > return SHRINK_STOP; > > freed = i915_gem_shrink(i915, > @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, > do { > if (i915_gem_wait_for_idle(i915, > 0, MAX_SCHEDULE_TIMEOUT) == 0 && > - shrinker_lock(i915, unlock)) > + shrinker_lock(i915, 0, unlock)) > break; > > schedule_timeout_killable(1); >
Quoting Tvrtko Ursulin (2018-12-06 15:18:13) > > On 04/12/2018 14:15, Chris Wilson wrote: > > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex > > in the shrinker while performing direct-reclaim. The trade-off being > > (much) lower latency for non-i915 clients at an increased risk of being > > unable to obtain a page from direct-reclaim without hitting the > > oom-notifier. The proviso being that we still keep trying to hard > > obtain the lock for oom so that we can reap under heavy memory pressure. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/gpu/drm/i915/i915_drv.h | 4 ++-- > > drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++------------- > > 2 files changed, 13 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index c5f01964f0fb..1cad218b71d3 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > > __i915_gem_object_unpin_pages(obj); > > } > > > > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ > > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ > > I915_MM_NORMAL = 0, > > - I915_MM_SHRINKER > > + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ > > }; > > > > void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > index ea90d3a0d511..d461f458f4af 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > @@ -36,7 +36,9 @@ > > #include "i915_drv.h" > > #include "i915_trace.h" > > > > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > +static bool shrinker_lock(struct drm_i915_private *i915, > > + unsigned int flags, > > + bool *unlock) > > { > > switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { > > case MUTEX_TRYLOCK_RECURSIVE: > > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > > > case MUTEX_TRYLOCK_FAILED: > > *unlock = false; > > - preempt_disable(); > > - do { > > - cpu_relax(); > > - if (mutex_trylock(&i915->drm.struct_mutex)) { > > - *unlock = true; > > - break; > > - } > > - } while (!need_resched()); > > - preempt_enable(); > > + if (flags & I915_SHRINK_ACTIVE) { > > + mutex_lock_nested(&i915->drm.struct_mutex, > > + I915_MM_SHRINKER); > > + *unlock = true; > > + } > > I just realized once oddity in the shrinker code which escaped me > before. It is the fact the call paths will call the shrinker_lock twice. > For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They > both first take lock with flags of zero, and then they call > i915_gem_shrink which takes the lock again, which obviously always > results in the recursive path to be taken. > > I think we need to clean this up so it is easier to understand the code > before further tweaking, even if in this patch. For instance adding > I915_SHRINK_LOCKED would solve it. > > shrinker_lock_uninterruptible is also funky in that it doesn't respect > the timeout in the waiting for idle phase. > > Sounds reasonable? My alternate code for this avoids struct_mutex here, but the compromise is that we can't process active requests here, and can't reap pages from zombie objects (objects that are still waiting for the RCU release). -Chris
Quoting Chris Wilson (2018-12-06 21:30:25) > Quoting Tvrtko Ursulin (2018-12-06 15:18:13) > > > > On 04/12/2018 14:15, Chris Wilson wrote: > > > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex > > > in the shrinker while performing direct-reclaim. The trade-off being > > > (much) lower latency for non-i915 clients at an increased risk of being > > > unable to obtain a page from direct-reclaim without hitting the > > > oom-notifier. The proviso being that we still keep trying to hard > > > obtain the lock for oom so that we can reap under heavy memory pressure. > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > --- > > > drivers/gpu/drm/i915/i915_drv.h | 4 ++-- > > > drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++------------- > > > 2 files changed, 13 insertions(+), 15 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > > index c5f01964f0fb..1cad218b71d3 100644 > > > --- a/drivers/gpu/drm/i915/i915_drv.h > > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > > > __i915_gem_object_unpin_pages(obj); > > > } > > > > > > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ > > > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ > > > I915_MM_NORMAL = 0, > > > - I915_MM_SHRINKER > > > + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ > > > }; > > > > > > void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > > index ea90d3a0d511..d461f458f4af 100644 > > > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > > > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > > > @@ -36,7 +36,9 @@ > > > #include "i915_drv.h" > > > #include "i915_trace.h" > > > > > > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > > +static bool shrinker_lock(struct drm_i915_private *i915, > > > + unsigned int flags, > > > + bool *unlock) > > > { > > > switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { > > > case MUTEX_TRYLOCK_RECURSIVE: > > > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > > > > > case MUTEX_TRYLOCK_FAILED: > > > *unlock = false; > > > - preempt_disable(); > > > - do { > > > - cpu_relax(); > > > - if (mutex_trylock(&i915->drm.struct_mutex)) { > > > - *unlock = true; > > > - break; > > > - } > > > - } while (!need_resched()); > > > - preempt_enable(); > > > + if (flags & I915_SHRINK_ACTIVE) { > > > + mutex_lock_nested(&i915->drm.struct_mutex, > > > + I915_MM_SHRINKER); > > > + *unlock = true; > > > + } > > > > I just realized once oddity in the shrinker code which escaped me > > before. It is the fact the call paths will call the shrinker_lock twice. > > For instance i915_gem_shrinker_vmap and i915_gem_shrinker_scan. They > > both first take lock with flags of zero, and then they call > > i915_gem_shrink which takes the lock again, which obviously always > > results in the recursive path to be taken. > > > > I think we need to clean this up so it is easier to understand the code > > before further tweaking, even if in this patch. For instance adding > > I915_SHRINK_LOCKED would solve it. > > > > shrinker_lock_uninterruptible is also funky in that it doesn't respect > > the timeout in the waiting for idle phase. > > > > Sounds reasonable? > > My alternate code for this avoids struct_mutex here, but the compromise > is that we can't process active requests here, and can't reap pages from > zombie objects (objects that are still waiting for the RCU release). As far as what the current patch is describing, I still like it. It basically says if we get to this point and we need to wait and freeze the batch queue but haven't actually committed ourselves to that, don't. -Chris
On 04/12/2018 14:15, Chris Wilson wrote: > Ignore trying to shrink from i915 if we fail to acquire the struct_mutex > in the shrinker while performing direct-reclaim. The trade-off being > (much) lower latency for non-i915 clients at an increased risk of being > unable to obtain a page from direct-reclaim without hitting the > oom-notifier. The proviso being that we still keep trying to hard > obtain the lock for oom so that we can reap under heavy memory pressure. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++-- > drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++------------- > 2 files changed, 13 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index c5f01964f0fb..1cad218b71d3 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > __i915_gem_object_unpin_pages(obj); > } > > -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ > +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ > I915_MM_NORMAL = 0, > - I915_MM_SHRINKER > + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ > }; > > void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > index ea90d3a0d511..d461f458f4af 100644 > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > @@ -36,7 +36,9 @@ > #include "i915_drv.h" > #include "i915_trace.h" > > -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > +static bool shrinker_lock(struct drm_i915_private *i915, > + unsigned int flags, > + bool *unlock) > { > switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { > case MUTEX_TRYLOCK_RECURSIVE: > @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) > > case MUTEX_TRYLOCK_FAILED: > *unlock = false; > - preempt_disable(); > - do { > - cpu_relax(); > - if (mutex_trylock(&i915->drm.struct_mutex)) { > - *unlock = true; > - break; > - } > - } while (!need_resched()); > - preempt_enable(); > + if (flags & I915_SHRINK_ACTIVE) { > + mutex_lock_nested(&i915->drm.struct_mutex, > + I915_MM_SHRINKER); Nested still scares me, even though so far I failed to break it. How about, and especially given the commit message talks about "still trying hard under oom", you actually split this patch into two: 1. Implement exactly what the commit says - so "if (flags & I915_SHRINK_ACTIVE)" the code keeps using the loopy trylock. 2. Add a patch which I earlier suggested, introducing I915_SHRINK_LOCKED, so we we avoid hitting the recursive path from the shrinker itself. 3. Cleanup the wait timeout handling in the vmap notifier case. 4. And only finally add the nested trick. Sounds like a reasonable shrinker cleanup and improvement mini-series? Regards, Tvrtko > + *unlock = true; > + } > return *unlock; > > case MUTEX_TRYLOCK_SUCCESS: > @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915, > unsigned long scanned = 0; > bool unlock; > > - if (!shrinker_lock(i915, &unlock)) > + if (!shrinker_lock(i915, flags, &unlock)) > return 0; > > /* > @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) > > sc->nr_scanned = 0; > > - if (!shrinker_lock(i915, &unlock)) > + if (!shrinker_lock(i915, 0, &unlock)) > return SHRINK_STOP; > > freed = i915_gem_shrink(i915, > @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, > do { > if (i915_gem_wait_for_idle(i915, > 0, MAX_SCHEDULE_TIMEOUT) == 0 && > - shrinker_lock(i915, unlock)) > + shrinker_lock(i915, 0, unlock)) > break; > > schedule_timeout_killable(1); >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c5f01964f0fb..1cad218b71d3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2916,9 +2916,9 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ I915_MM_NORMAL = 0, - I915_MM_SHRINKER + I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ }; void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index ea90d3a0d511..d461f458f4af 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -36,7 +36,9 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) +static bool shrinker_lock(struct drm_i915_private *i915, + unsigned int flags, + bool *unlock) { switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) { case MUTEX_TRYLOCK_RECURSIVE: @@ -45,15 +47,11 @@ static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock) case MUTEX_TRYLOCK_FAILED: *unlock = false; - preempt_disable(); - do { - cpu_relax(); - if (mutex_trylock(&i915->drm.struct_mutex)) { - *unlock = true; - break; - } - } while (!need_resched()); - preempt_enable(); + if (flags & I915_SHRINK_ACTIVE) { + mutex_lock_nested(&i915->drm.struct_mutex, + I915_MM_SHRINKER); + *unlock = true; + } return *unlock; case MUTEX_TRYLOCK_SUCCESS: @@ -160,7 +158,7 @@ i915_gem_shrink(struct drm_i915_private *i915, unsigned long scanned = 0; bool unlock; - if (!shrinker_lock(i915, &unlock)) + if (!shrinker_lock(i915, flags, &unlock)) return 0; /* @@ -357,7 +355,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_scanned = 0; - if (!shrinker_lock(i915, &unlock)) + if (!shrinker_lock(i915, 0, &unlock)) return SHRINK_STOP; freed = i915_gem_shrink(i915, @@ -397,7 +395,7 @@ shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock, do { if (i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT) == 0 && - shrinker_lock(i915, unlock)) + shrinker_lock(i915, 0, unlock)) break; schedule_timeout_killable(1);
Ignore trying to shrink from i915 if we fail to acquire the struct_mutex in the shrinker while performing direct-reclaim. The trade-off being (much) lower latency for non-i915 clients at an increased risk of being unable to obtain a page from direct-reclaim without hitting the oom-notifier. The proviso being that we still keep trying to hard obtain the lock for oom so that we can reap under heavy memory pressure. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-)