diff mbox

[2/5] drm/i915: track ring progression using seqnos

Message ID 1368451933-32571-2-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala May 13, 2013, 1:32 p.m. UTC
Instead of relying in acthd, track ring seqno progression
to detect if ring has hung.

v2: put hangcheck stuff inside struct (Chris Wilson)

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |    2 --
 drivers/gpu/drm/i915/i915_irq.c         |   30 +++++++++++++-----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    6 ++++++
 3 files changed, 19 insertions(+), 19 deletions(-)

Comments

Ben Widawsky May 17, 2013, 5:40 p.m. UTC | #1
On Mon, May 13, 2013 at 04:32:10PM +0300, Mika Kuoppala wrote:
> Instead of relying in acthd, track ring seqno progression
> to detect if ring has hung.
> 
> v2: put hangcheck stuff inside struct (Chris Wilson)
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |    2 --
>  drivers/gpu/drm/i915/i915_irq.c         |   30 +++++++++++++-----------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    6 ++++++
>  3 files changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 14817de..db7cda9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -834,8 +834,6 @@ struct i915_gpu_error {
>  #define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
>  	struct timer_list hangcheck_timer;
>  	int hangcheck_count;
> -	uint32_t last_acthd[I915_NUM_RINGS];
> -	uint32_t prev_instdone[I915_NUM_INSTDONE_REG];
>  
>  	/* For reset and error_state handling. */
>  	spinlock_t lock;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0e5c9b0..004ad34 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2384,22 +2384,19 @@ void i915_hangcheck_elapsed(unsigned long data)
>  {
>  	struct drm_device *dev = (struct drm_device *)data;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> -	uint32_t acthd[I915_NUM_RINGS], instdone[I915_NUM_INSTDONE_REG];
>  	struct intel_ring_buffer *ring;
>  	bool err = false, idle;
>  	int i;
> +	u32 seqno[I915_NUM_RINGS];
> +	bool work_done;
>  
>  	if (!i915_enable_hangcheck)
>  		return;
>  
> -	memset(acthd, 0, sizeof(acthd));
>  	idle = true;
>  	for_each_ring(ring, dev_priv, i) {
> -		u32 seqno;
> -
> -		seqno = ring->get_seqno(ring, false);
> -		idle &= i915_hangcheck_ring_idle(ring, seqno, &err);
> -	    acthd[i] = intel_ring_get_active_head(ring);
> +		seqno[i] = ring->get_seqno(ring, false);
> +		idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err);
>  	}
>  
>  	/* If all work is done then ACTHD clearly hasn't advanced. */
> @@ -2415,20 +2412,19 @@ void i915_hangcheck_elapsed(unsigned long data)
>  		return;
>  	}
>  
> -	i915_get_extra_instdone(dev, instdone);
> -	if (memcmp(dev_priv->gpu_error.last_acthd, acthd,
> -		   sizeof(acthd)) == 0 &&
> -	    memcmp(dev_priv->gpu_error.prev_instdone, instdone,
> -		   sizeof(instdone)) == 0) {
> +	work_done = false;
> +	for_each_ring(ring, dev_priv, i) {
> +		if (ring->hangcheck.seqno != seqno[i]) {
> +			work_done = true;
> +			ring->hangcheck.seqno = seqno[i];
> +		}
> +	}
> +
> +	if (!work_done) {
>  		if (i915_hangcheck_hung(dev))
>  			return;
>  	} else {
>  		dev_priv->gpu_error.hangcheck_count = 0;
> -
> -		memcpy(dev_priv->gpu_error.last_acthd, acthd,
> -		       sizeof(acthd));
> -		memcpy(dev_priv->gpu_error.prev_instdone, instdone,
> -		       sizeof(instdone));
>  	}
>  
>  repeat:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index dac1614..ef374a8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -37,6 +37,10 @@ struct  intel_hw_status_page {
>  #define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
>  #define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
>  
> +struct intel_ring_hangcheck {
> +	u32 seqno;
> +};
> +

Shouldn't you initialize this thing in i915_gem_init_seqno()?

>  struct  intel_ring_buffer {
>  	const char	*name;
>  	enum intel_ring_id {
> @@ -137,6 +141,8 @@ struct  intel_ring_buffer {
>  	struct i915_hw_context *default_context;
>  	struct i915_hw_context *last_context;
>  
> +	struct intel_ring_hangcheck hangcheck;
> +
>  	void *private;
>  };
>  
> -- 
> 1.7.9.5
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 14817de..db7cda9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -834,8 +834,6 @@  struct i915_gpu_error {
 #define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
 	struct timer_list hangcheck_timer;
 	int hangcheck_count;
-	uint32_t last_acthd[I915_NUM_RINGS];
-	uint32_t prev_instdone[I915_NUM_INSTDONE_REG];
 
 	/* For reset and error_state handling. */
 	spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0e5c9b0..004ad34 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2384,22 +2384,19 @@  void i915_hangcheck_elapsed(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t acthd[I915_NUM_RINGS], instdone[I915_NUM_INSTDONE_REG];
 	struct intel_ring_buffer *ring;
 	bool err = false, idle;
 	int i;
+	u32 seqno[I915_NUM_RINGS];
+	bool work_done;
 
 	if (!i915_enable_hangcheck)
 		return;
 
-	memset(acthd, 0, sizeof(acthd));
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
-		u32 seqno;
-
-		seqno = ring->get_seqno(ring, false);
-		idle &= i915_hangcheck_ring_idle(ring, seqno, &err);
-	    acthd[i] = intel_ring_get_active_head(ring);
+		seqno[i] = ring->get_seqno(ring, false);
+		idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err);
 	}
 
 	/* If all work is done then ACTHD clearly hasn't advanced. */
@@ -2415,20 +2412,19 @@  void i915_hangcheck_elapsed(unsigned long data)
 		return;
 	}
 
-	i915_get_extra_instdone(dev, instdone);
-	if (memcmp(dev_priv->gpu_error.last_acthd, acthd,
-		   sizeof(acthd)) == 0 &&
-	    memcmp(dev_priv->gpu_error.prev_instdone, instdone,
-		   sizeof(instdone)) == 0) {
+	work_done = false;
+	for_each_ring(ring, dev_priv, i) {
+		if (ring->hangcheck.seqno != seqno[i]) {
+			work_done = true;
+			ring->hangcheck.seqno = seqno[i];
+		}
+	}
+
+	if (!work_done) {
 		if (i915_hangcheck_hung(dev))
 			return;
 	} else {
 		dev_priv->gpu_error.hangcheck_count = 0;
-
-		memcpy(dev_priv->gpu_error.last_acthd, acthd,
-		       sizeof(acthd));
-		memcpy(dev_priv->gpu_error.prev_instdone, instdone,
-		       sizeof(instdone));
 	}
 
 repeat:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index dac1614..ef374a8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -37,6 +37,10 @@  struct  intel_hw_status_page {
 #define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
 #define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
 
+struct intel_ring_hangcheck {
+	u32 seqno;
+};
+
 struct  intel_ring_buffer {
 	const char	*name;
 	enum intel_ring_id {
@@ -137,6 +141,8 @@  struct  intel_ring_buffer {
 	struct i915_hw_context *default_context;
 	struct i915_hw_context *last_context;
 
+	struct intel_ring_hangcheck hangcheck;
+
 	void *private;
 };