Message ID | 20170306101541.26171-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/03/2017 10:15, Chris Wilson wrote: > In order for the missed-irq update to take effect, the device must be > idle. So when the user updates the fault injection via debugfs, idle the > device. > > v2: Idle is explicitly required for setting test_irq, and good behaviour > for clearing the missed_irq. > > Testcase: igt/drv_missed_irq > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 55 +++++++++++++++++++++++++------------ > 1 file changed, 38 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 4a3e5b9552f8..511d3541d3d5 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -4138,6 +4138,39 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, > "%llu\n"); > > static int > +fault_irq_set(struct drm_i915_private *i915, unsigned long *irq, u64 val) > +{ > + int err; > + > + err = mutex_lock_interruptible(&i915->drm.struct_mutex); > + if (err) > + return err; > + > + err = i915_gem_wait_for_idle(i915, > + I915_WAIT_LOCKED | > + I915_WAIT_INTERRUPTIBLE); > + if (err) > + goto err_unlock; > + > + /* Retire to kick idle work */ > + i915_gem_retire_requests(i915); > + GEM_BUG_ON(i915->gt.active_requests); > + > + *irq = val & INTEL_INFO(i915)->ring_mask; Looks like a type width mismatch on 32-bit. Should we change missed_irq_rings to an u64? > + mutex_unlock(&i915->drm.struct_mutex); > + > + /* Flush idle worker to disarm irq */ > + while (flush_delayed_work(&i915->gt.idle_work)) > + ; Worth sticking a schedule in here or something? Not worth it for debugfs I guess since we don't have it elsewhere. > + > + return 0; > + > +err_unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + return err; > +} > + > +static int > i915_ring_missed_irq_get(void *data, u64 *val) > { > struct drm_i915_private *dev_priv = data; > @@ -4149,18 +4182,8 @@ i915_ring_missed_irq_get(void *data, u64 *val) > static int > i915_ring_missed_irq_set(void *data, u64 val) > { > - struct drm_i915_private *dev_priv = data; > - struct drm_device *dev = &dev_priv->drm; > - int ret; > - > - /* Lock against concurrent debugfs callers */ > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > - dev_priv->gpu_error.missed_irq_rings = val; > - mutex_unlock(&dev->struct_mutex); > - > - return 0; > + struct drm_i915_private *i915 = data; > + return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); > } > > DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, > @@ -4180,13 +4203,11 @@ i915_ring_test_irq_get(void *data, u64 *val) > static int > i915_ring_test_irq_set(void *data, u64 val) > { > - struct drm_i915_private *dev_priv = data; > + struct drm_i915_private *i915 = data; > > - val &= INTEL_INFO(dev_priv)->ring_mask; > + val &= INTEL_INFO(i915)->ring_mask; > DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); > - dev_priv->gpu_error.test_irq_rings = val; > - > - return 0; > + return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); > } > > DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On Mon, Mar 06, 2017 at 05:45:30PM +0000, Tvrtko Ursulin wrote: > > On 06/03/2017 10:15, Chris Wilson wrote: > > static int > >+fault_irq_set(struct drm_i915_private *i915, unsigned long *irq, u64 val) > >+{ > >+ int err; > >+ > >+ err = mutex_lock_interruptible(&i915->drm.struct_mutex); > >+ if (err) > >+ return err; > >+ > >+ err = i915_gem_wait_for_idle(i915, > >+ I915_WAIT_LOCKED | > >+ I915_WAIT_INTERRUPTIBLE); > >+ if (err) > >+ goto err_unlock; > >+ > >+ /* Retire to kick idle work */ > >+ i915_gem_retire_requests(i915); > >+ GEM_BUG_ON(i915->gt.active_requests); > >+ > >+ *irq = val & INTEL_INFO(i915)->ring_mask; > > Looks like a type width mismatch on 32-bit. > > Should we change missed_irq_rings to an u64? Currently unsigned long, and for convenience with test_bit() should remain as an array of unsigned longs (i.e. bitmap). The mismatch here is probably better served by s/u64 val/unsigned long val/. If we need to go a full bitmap in the future, we'll have to switch to a bitmap_parse_str. So unsigned long looks to be the future proof type. > > >+ mutex_unlock(&i915->drm.struct_mutex); > >+ > >+ /* Flush idle worker to disarm irq */ > >+ while (flush_delayed_work(&i915->gt.idle_work)) > >+ ; > > Worth sticking a schedule in here or something? Not worth it for > debugfs I guess since we don't have it elsewhere. flush_delayed_work() is itself a schedule (if active), underneath it does a wait-for-completion on the work. -Chris
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4a3e5b9552f8..511d3541d3d5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4138,6 +4138,39 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, "%llu\n"); static int +fault_irq_set(struct drm_i915_private *i915, unsigned long *irq, u64 val) +{ + int err; + + err = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (err) + return err; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (err) + goto err_unlock; + + /* Retire to kick idle work */ + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); + + *irq = val & INTEL_INFO(i915)->ring_mask; + mutex_unlock(&i915->drm.struct_mutex); + + /* Flush idle worker to disarm irq */ + while (flush_delayed_work(&i915->gt.idle_work)) + ; + + return 0; + +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int i915_ring_missed_irq_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4149,18 +4182,8 @@ i915_ring_missed_irq_get(void *data, u64 *val) static int i915_ring_missed_irq_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; - struct drm_device *dev = &dev_priv->drm; - int ret; - - /* Lock against concurrent debugfs callers */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - dev_priv->gpu_error.missed_irq_rings = val; - mutex_unlock(&dev->struct_mutex); - - return 0; + struct drm_i915_private *i915 = data; + return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops, @@ -4180,13 +4203,11 @@ i915_ring_test_irq_get(void *data, u64 *val) static int i915_ring_test_irq_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; + struct drm_i915_private *i915 = data; - val &= INTEL_INFO(dev_priv)->ring_mask; + val &= INTEL_INFO(i915)->ring_mask; DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val); - dev_priv->gpu_error.test_irq_rings = val; - - return 0; + return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val); } DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
In order for the missed-irq update to take effect, the device must be idle. So when the user updates the fault injection via debugfs, idle the device. v2: Idle is explicitly required for setting test_irq, and good behaviour for clearing the missed_irq. Testcase: igt/drv_missed_irq Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 55 +++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 17 deletions(-)