diff mbox series

[5/5] drm/i915: Forcibly flush unwanted requests in drop-caches

Message ID 20180903083337.13134-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/5] drm/i915: Do a full device reset after being wedged | expand

Commit Message

Chris Wilson Sept. 3, 2018, 8:33 a.m. UTC
Add a mode to debugfs/drop-caches to flush unwanted requests off the GPU
(by wedging the device and resetting). This is very useful if a test
terminated leaving a long queue of hanging batches that would ordinarily
require a round trip through hangcheck for each.

It reduces the inter-test operation to just a write into drop-caches to
reset driver/GPU state between tests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 52 ++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 16 deletions(-)

Comments

Joonas Lahtinen Sept. 3, 2018, 10:24 a.m. UTC | #1
Quoting Chris Wilson (2018-09-03 11:33:37)
> Add a mode to debugfs/drop-caches to flush unwanted requests off the GPU
> (by wedging the device and resetting). This is very useful if a test
> terminated leaving a long queue of hanging batches that would ordinarily
> require a round trip through hangcheck for each.
> 
> It reduces the inter-test operation to just a write into drop-caches to
> reset driver/GPU state between tests.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a5265c236a33..4ad0e2ed8610 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4131,13 +4131,17 @@  DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
 #define DROP_FREED	BIT(4)
 #define DROP_SHRINK_ALL	BIT(5)
 #define DROP_IDLE	BIT(6)
+#define DROP_RESET_ACTIVE	BIT(7)
+#define DROP_RESET_SEQNO	BIT(8)
 #define DROP_ALL (DROP_UNBOUND	| \
 		  DROP_BOUND	| \
 		  DROP_RETIRE	| \
 		  DROP_ACTIVE	| \
 		  DROP_FREED	| \
 		  DROP_SHRINK_ALL |\
-		  DROP_IDLE)
+		  DROP_IDLE	| \
+		  DROP_RESET_ACTIVE | \
+		  DROP_RESET_SEQNO)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -4149,53 +4153,69 @@  i915_drop_caches_get(void *data, u64 *val)
 static int
 i915_drop_caches_set(void *data, u64 val)
 {
-	struct drm_i915_private *dev_priv = data;
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = data;
 	int ret = 0;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
 
+	if (val & DROP_RESET_ACTIVE && !intel_engines_are_idle(i915))
+		i915_gem_set_wedged(i915);
+
 	/* No need to check and wait for gpu resets, only libdrm auto-restarts
 	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_RETIRE)) {
-		ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
+		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 		if (ret)
 			return ret;
 
 		if (val & DROP_ACTIVE)
-			ret = i915_gem_wait_for_idle(dev_priv,
+			ret = i915_gem_wait_for_idle(i915,
 						     I915_WAIT_INTERRUPTIBLE |
 						     I915_WAIT_LOCKED,
 						     MAX_SCHEDULE_TIMEOUT);
 
+		if (val & DROP_RESET_SEQNO) {
+			intel_runtime_pm_get(i915);
+			ret = i915_gem_set_global_seqno(&i915->drm, 1);
+			intel_runtime_pm_put(i915);
+		}
+
 		if (val & DROP_RETIRE)
-			i915_retire_requests(dev_priv);
+			i915_retire_requests(i915);
 
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
+	if (val & DROP_RESET_ACTIVE &&
+	    i915_terminally_wedged(&i915->gpu_error)) {
+		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
+		wait_on_bit(&i915->gpu_error.flags,
+			    I915_RESET_HANDOFF,
+			    TASK_UNINTERRUPTIBLE);
 	}
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
-		i915_gem_shrink(dev_priv, LONG_MAX, NULL, I915_SHRINK_BOUND);
+		i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND);
 
 	if (val & DROP_UNBOUND)
-		i915_gem_shrink(dev_priv, LONG_MAX, NULL, I915_SHRINK_UNBOUND);
+		i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_UNBOUND);
 
 	if (val & DROP_SHRINK_ALL)
-		i915_gem_shrink_all(dev_priv);
+		i915_gem_shrink_all(i915);
 	fs_reclaim_release(GFP_KERNEL);
 
 	if (val & DROP_IDLE) {
 		do {
-			if (READ_ONCE(dev_priv->gt.active_requests))
-				flush_delayed_work(&dev_priv->gt.retire_work);
-			drain_delayed_work(&dev_priv->gt.idle_work);
-		} while (READ_ONCE(dev_priv->gt.awake));
+			if (READ_ONCE(i915->gt.active_requests))
+				flush_delayed_work(&i915->gt.retire_work);
+			drain_delayed_work(&i915->gt.idle_work);
+		} while (READ_ONCE(i915->gt.awake));
 	}
 
 	if (val & DROP_FREED)
-		i915_gem_drain_freed_objects(dev_priv);
+		i915_gem_drain_freed_objects(i915);
 
 	return ret;
 }