Message ID | 20190614071023.17929-3-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/39] drm/i915: Discard some redundant cache domain flushes | expand |
Chris Wilson <chris@chris-wilson.co.uk> writes: > We already use a mutex to serialise i915_reset() and wedging, so all we > need it to link that into i915_request_wait() and we have our lock cycle > detection. > > v2.5: Take error mutex for selftests > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/gt/intel_reset.c | 6 ++---- > drivers/gpu/drm/i915/i915_drv.h | 8 -------- > drivers/gpu/drm/i915/i915_gem.c | 3 --- > drivers/gpu/drm/i915/i915_request.c | 12 ++++++++++-- > drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 -- > 5 files changed, 12 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c > index 8ba7af8b7ced..41a294f5cc19 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -978,7 +978,7 @@ void i915_reset(struct drm_i915_private *i915, > > might_sleep(); > GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); > - lock_map_acquire(&i915->gt.reset_lockmap); > + mutex_lock(&error->wedge_mutex); > > /* Clear any previous failed attempts at recovery. Time to try again. */ > if (!__i915_gem_unset_wedged(i915)) > @@ -1031,7 +1031,7 @@ void i915_reset(struct drm_i915_private *i915, > finish: > reset_finish(i915); > unlock: > - lock_map_release(&i915->gt.reset_lockmap); > + mutex_unlock(&error->wedge_mutex); > return; > > taint: > @@ -1147,9 +1147,7 @@ static void i915_reset_device(struct drm_i915_private *i915, > /* Flush everyone using a resource about to be clobbered */ > synchronize_srcu_expedited(&error->reset_backoff_srcu); > > - mutex_lock(&error->wedge_mutex); > i915_reset(i915, engine_mask, reason); > - mutex_unlock(&error->wedge_mutex); > > intel_finish_reset(i915); > } > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 90d94d904e65..3683ef6d4c28 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1901,14 +1901,6 @@ struct drm_i915_private { > ktime_t last_init_time; > > struct i915_vma *scratch; > - > - /* > - * We must never wait on the GPU while holding a lock as we > - * may need to perform a GPU reset. So while we don't need to > - * serialise wait/reset with an explicit lock, we do want > - * lockdep to detect potential dependency cycles. > - */ > - struct lockdep_map reset_lockmap; > } gt; > > struct { > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 4bbded4aa936..7232361973fd 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1746,7 +1746,6 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) > > int i915_gem_init_early(struct drm_i915_private *dev_priv) > { > - static struct lock_class_key reset_key; > int err; > > intel_gt_pm_init(dev_priv); > @@ -1754,8 +1753,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) > INIT_LIST_HEAD(&dev_priv->gt.active_rings); > INIT_LIST_HEAD(&dev_priv->gt.closed_vma); > spin_lock_init(&dev_priv->gt.closed_lock); > - lockdep_init_map(&dev_priv->gt.reset_lockmap, > - "i915.reset", &reset_key, 0); > > i915_gem_init__mm(dev_priv); > i915_gem_init__pm(dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 1cbc3ef4fc27..5311286578b7 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1444,7 +1444,15 @@ long i915_request_wait(struct i915_request *rq, > return -ETIME; > > trace_i915_request_wait_begin(rq, flags); > - lock_map_acquire(&rq->i915->gt.reset_lockmap); > + > + /* > + * We must never wait on the GPU while holding a lock as we > + * may need to perform a GPU reset. So while we don't need to > + * serialise wait/reset with an explicit lock, we do want > + * lockdep to detect potential dependency cycles. > + */ > + mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, > + 0, 0, _THIS_IP_); Seems to translate to exclusive lock with full checking. There was ofcourse a slight possibilty that previous reviewer did read all the lockdep.h. Looked at the wedge mutex and connected the dots. Well, it is obvious now. Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > /* > * Optimistic spin before touching IRQs. > @@ -1518,7 +1526,7 @@ long i915_request_wait(struct i915_request *rq, > dma_fence_remove_callback(&rq->fence, &wait.cb); > > out: > - lock_map_release(&rq->i915->gt.reset_lockmap); > + mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); > trace_i915_request_wait_end(rq); > return timeout; > } > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index 1e9ffced78c1..b7f3fbb4ae89 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -130,7 +130,6 @@ static struct dev_pm_domain pm_domain = { > > struct drm_i915_private *mock_gem_device(void) > { > - static struct lock_class_key reset_key; > struct drm_i915_private *i915; > struct pci_dev *pdev; > int err; > @@ -205,7 +204,6 @@ struct drm_i915_private *mock_gem_device(void) > INIT_LIST_HEAD(&i915->gt.active_rings); > INIT_LIST_HEAD(&i915->gt.closed_vma); > spin_lock_init(&i915->gt.closed_lock); > - lockdep_init_map(&i915->gt.reset_lockmap, "i915.reset", &reset_key, 0); > > mutex_lock(&i915->drm.struct_mutex); > > -- > 2.20.1
Quoting Mika Kuoppala (2019-06-14 15:10:08) > Chris Wilson <chris@chris-wilson.co.uk> writes: > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 1cbc3ef4fc27..5311286578b7 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -1444,7 +1444,15 @@ long i915_request_wait(struct i915_request *rq, > > return -ETIME; > > > > trace_i915_request_wait_begin(rq, flags); > > - lock_map_acquire(&rq->i915->gt.reset_lockmap); > > + > > + /* > > + * We must never wait on the GPU while holding a lock as we > > + * may need to perform a GPU reset. So while we don't need to > > + * serialise wait/reset with an explicit lock, we do want > > + * lockdep to detect potential dependency cycles. > > + */ > > + mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, > > + 0, 0, _THIS_IP_); > > Seems to translate to exclusive lock with full checking. > > There was ofcourse a slight possibilty that previous reviewer did > read all the lockdep.h. Looked at the wedge mutex and connected > the dots. Well, it is obvious now. Hah, I had forgotten all about wedge_mutex :-p Hopefully, this keeps our reset handling robust. First I have to fix the mistakes I've recently made... I just need to find a reviewer for struct_mutex removal :) -Chris
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8ba7af8b7ced..41a294f5cc19 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -978,7 +978,7 @@ void i915_reset(struct drm_i915_private *i915, might_sleep(); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - lock_map_acquire(&i915->gt.reset_lockmap); + mutex_lock(&error->wedge_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ if (!__i915_gem_unset_wedged(i915)) @@ -1031,7 +1031,7 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); unlock: - lock_map_release(&i915->gt.reset_lockmap); + mutex_unlock(&error->wedge_mutex); return; taint: @@ -1147,9 +1147,7 @@ static void i915_reset_device(struct drm_i915_private *i915, /* Flush everyone using a resource about to be clobbered */ synchronize_srcu_expedited(&error->reset_backoff_srcu); - mutex_lock(&error->wedge_mutex); i915_reset(i915, engine_mask, reason); - mutex_unlock(&error->wedge_mutex); intel_finish_reset(i915); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 90d94d904e65..3683ef6d4c28 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1901,14 +1901,6 @@ struct drm_i915_private { ktime_t last_init_time; struct i915_vma *scratch; - - /* - * We must never wait on the GPU while holding a lock as we - * may need to perform a GPU reset. So while we don't need to - * serialise wait/reset with an explicit lock, we do want - * lockdep to detect potential dependency cycles. - */ - struct lockdep_map reset_lockmap; } gt; struct { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4bbded4aa936..7232361973fd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1746,7 +1746,6 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) int i915_gem_init_early(struct drm_i915_private *dev_priv) { - static struct lock_class_key reset_key; int err; intel_gt_pm_init(dev_priv); @@ -1754,8 +1753,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); spin_lock_init(&dev_priv->gt.closed_lock); - lockdep_init_map(&dev_priv->gt.reset_lockmap, - "i915.reset", &reset_key, 0); i915_gem_init__mm(dev_priv); i915_gem_init__pm(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1cbc3ef4fc27..5311286578b7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1444,7 +1444,15 @@ long i915_request_wait(struct i915_request *rq, return -ETIME; trace_i915_request_wait_begin(rq, flags); - lock_map_acquire(&rq->i915->gt.reset_lockmap); + + /* + * We must never wait on the GPU while holding a lock as we + * may need to perform a GPU reset. So while we don't need to + * serialise wait/reset with an explicit lock, we do want + * lockdep to detect potential dependency cycles. + */ + mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, + 0, 0, _THIS_IP_); /* * Optimistic spin before touching IRQs. @@ -1518,7 +1526,7 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: - lock_map_release(&rq->i915->gt.reset_lockmap); + mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); trace_i915_request_wait_end(rq); return timeout; } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 1e9ffced78c1..b7f3fbb4ae89 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -130,7 +130,6 @@ static struct dev_pm_domain pm_domain = { struct drm_i915_private *mock_gem_device(void) { - static struct lock_class_key reset_key; struct drm_i915_private *i915; struct pci_dev *pdev; int err; @@ -205,7 +204,6 @@ struct drm_i915_private *mock_gem_device(void) INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); spin_lock_init(&i915->gt.closed_lock); - lockdep_init_map(&i915->gt.reset_lockmap, "i915.reset", &reset_key, 0); mutex_lock(&i915->drm.struct_mutex);
We already use a mutex to serialise i915_reset() and wedging, so all we need it to link that into i915_request_wait() and we have our lock cycle detection. v2.5: Take error mutex for selftests Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/gt/intel_reset.c | 6 ++---- drivers/gpu/drm/i915/i915_drv.h | 8 -------- drivers/gpu/drm/i915/i915_gem.c | 3 --- drivers/gpu/drm/i915/i915_request.c | 12 ++++++++++-- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 -- 5 files changed, 12 insertions(+), 19 deletions(-)