diff mbox

[4/4] drm/i915: Detect small loops in hangcheck

Message ID 1448902389-12477-4-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Nov. 30, 2015, 4:53 p.m. UTC
If there is very small loop in batch, the chances are quite high
that we sample the same head value twice in a row leading the
hangcheck score to be incremented with hung engine status, instead of
active loop which would have been more correct.

Try to resample the actual head few times to detect small loops
instead of jumping into conclusions.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c1d1400..7c1168b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2914,12 +2914,8 @@  static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-head_stuck(struct intel_engine_cs *ring, u64 acthd)
+head_action(struct intel_engine_cs *ring, u64 acthd)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 head;
-
 	if (acthd != ring->hangcheck.acthd) {
 		if (acthd > ring->hangcheck.max_acthd) {
 			ring->hangcheck.max_acthd = acthd;
@@ -2929,6 +2925,21 @@  head_stuck(struct intel_engine_cs *ring, u64 acthd)
 		return HANGCHECK_ACTIVE_LOOP;
 	}
 
+	return HANGCHECK_HUNG;
+}
+
+static enum intel_ring_hangcheck_action
+head_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	static enum intel_ring_hangcheck_action ha;
+	u32 head, retries = 5;
+
+	ha = head_action(ring, acthd);
+	if (ha != HANGCHECK_HUNG)
+		return ha;
+
 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
 
 	/* Some operations, like pipe flush, can take a long time.
@@ -2938,6 +2949,17 @@  head_stuck(struct intel_engine_cs *ring, u64 acthd)
 	if (lower_32_bits(acthd) == head)
 		return HANGCHECK_ACTIVE_LOOP;
 
+	do {
+		msleep(20);
+
+		ring->hangcheck.acthd = acthd;
+		acthd = intel_ring_get_active_head(ring);
+
+		ha = head_action(ring, acthd);
+		if (ha != HANGCHECK_HUNG)
+			return ha;
+	} while (retries--);
+
 	return HANGCHECK_HUNG;
 }