diff mbox

[4/4] drm/i915: Get rid of acthd based guilty batch search

Message ID 1391007939-5741-5-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Jan. 29, 2014, 3:05 p.m. UTC
As we seek the guilty batch using request and hangcheck
score, this code is not needed anymore.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c |   91 ++-------------------------------------
 1 file changed, 4 insertions(+), 87 deletions(-)

Comments

Ben Widawsky Jan. 29, 2014, 9:44 p.m. UTC | #1
On Wed, Jan 29, 2014 at 05:05:39PM +0200, Mika Kuoppala wrote:
> As we seek the guilty batch using request and hangcheck
> score, this code is not needed anymore.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   91 ++-------------------------------------
>  1 file changed, 4 insertions(+), 87 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a46a1a7..8637898 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2241,70 +2241,6 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> -static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj,
> -				    struct i915_address_space *vm)
> -{
> -	if (acthd >= i915_gem_obj_offset(obj, vm) &&
> -	    acthd < i915_gem_obj_offset(obj, vm) + obj->base.size)
> -		return true;
> -
> -	return false;
> -}
> -
> -static bool i915_head_inside_request(const u32 acthd_unmasked,
> -				     const u32 request_start,
> -				     const u32 request_end)
> -{
> -	const u32 acthd = acthd_unmasked & HEAD_ADDR;
> -
> -	if (request_start < request_end) {
> -		if (acthd >= request_start && acthd < request_end)
> -			return true;
> -	} else if (request_start > request_end) {
> -		if (acthd >= request_start || acthd < request_end)
> -			return true;
> -	}
> -
> -	return false;
> -}
> -
> -static struct i915_address_space *
> -request_to_vm(struct drm_i915_gem_request *request)
> -{
> -	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
> -	struct i915_address_space *vm;
> -
> -	if (request->ctx)
> -		vm = request->ctx->vm;
> -	else
> -		vm = &dev_priv->gtt.base;
> -
> -	return vm;
> -}
> -
> -static bool i915_request_guilty(struct drm_i915_gem_request *request,
> -				const u32 acthd, bool *inside)
> -{
> -	/* There is a possibility that unmasked head address
> -	 * pointing inside the ring, matches the batch_obj address range.
> -	 * However this is extremely unlikely.
> -	 */
> -	if (request->batch_obj) {
> -		if (i915_head_inside_object(acthd, request->batch_obj,
> -					    request_to_vm(request))) {
> -			*inside = true;
> -			return true;
> -		}
> -	}
> -
> -	if (i915_head_inside_request(acthd, request->head, request->tail)) {
> -		*inside = false;
> -		return true;
> -	}
> -
> -	return false;
> -}
> -
>  static bool i915_context_is_banned(const struct i915_hw_context *ctx)
>  {
>  	struct drm_i915_private *dev_priv;
> @@ -2329,30 +2265,11 @@ static bool i915_context_is_banned(const struct i915_hw_context *ctx)
>  	return false;
>  }
>  
> -static void i915_set_reset_status(struct intel_ring_buffer *ring,
> -				  struct drm_i915_gem_request *request,
> -				  const bool guilty)
> +static void i915_set_reset_status(struct i915_hw_context *ctx,
> +				 const bool guilty)
>  {
> -	const u32 acthd = intel_ring_get_active_head(ring);
> -	bool inside;
> -	unsigned long offset = 0;
> -	struct i915_hw_context *ctx = request->ctx;
>  	struct i915_ctx_hang_stats *hs;
>  
> -	if (request->batch_obj)
> -		offset = i915_gem_obj_offset(request->batch_obj,
> -					     request_to_vm(request));
> -
> -	if (guilty &&
> -	    i915_request_guilty(request, acthd, &inside)) {
> -		DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
> -			  ring->name,
> -			  inside ? "inside" : "flushing",
> -			  offset,
> -			  ctx ? ctx->id : 0,
> -			  acthd);
> -	}
> -
>  	if (WARN_ON(!ctx))
>  		return;
>  
> @@ -2407,10 +2324,10 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  
>  	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -	i915_set_reset_status(ring, request, ring_hung);
> +	i915_set_reset_status(request->ctx, ring_hung);
>  
>  	list_for_each_entry_continue(request, &ring->request_list, list)
> -		i915_set_reset_status(ring, request, false);
> +		i915_set_reset_status(request->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,

I can't say that I'm a huge fan of calling i915_set_reset_status twice
(I bit my lip while reading the last patch). To me it suggests that the
interface probably ended up a bit poorly designed. I can live with it
though, I just couldn't bite my lip for 2 patches in a row :-)

I guess I've missed how this solves the issue I poked about in the
original series. However, the code overall is a big improvement, and
even in the unlikely case that I am not just being blind to your
solution- the odds of having multiple hung rings are slim enough that I
can live with that either way.

I did put some requests in the patches. Each already had an
unconditional r-b. Therefore, the series is:
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

Last word: As I've discussed with Chris too, I am overall a bit wary of
removing any use upon hardware for doing a lot of these error triage,
detection and collection functions. I really like that no matter how
bonghits our driver gets, we can read certain registers to try to figure
things out. I say this now since I think after this series I will no
longer have a leg to stand on in the, we shouldn't use requests for
error collection, discussion. Thanks for reading my rant.
Chris Wilson Jan. 30, 2014, 12:59 p.m. UTC | #2
On Wed, Jan 29, 2014 at 01:44:34PM -0800, Ben Widawsky wrote:
> Last word: As I've discussed with Chris too, I am overall a bit wary of
> removing any use upon hardware for doing a lot of these error triage,
> detection and collection functions. I really like that no matter how
> bonghits our driver gets, we can read certain registers to try to figure
> things out. I say this now since I think after this series I will no
> longer have a leg to stand on in the, we shouldn't use requests for
> error collection, discussion. Thanks for reading my rant.

I know exactly what you mean. But without a complete snapshot of the
hardware and memory (including old contents of overwritten buffers) we
will always be missing something when we come to post-mortem debugging.

I trust tracking activity by seqno though, and in the error states where
that itself has been erroneous the fault has stood out (and had been the
root cause of the hang anyway). Hence my feeling that it is exactly what
I want for port-mortem debugging.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a46a1a7..8637898 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2241,70 +2241,6 @@  i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj,
-				    struct i915_address_space *vm)
-{
-	if (acthd >= i915_gem_obj_offset(obj, vm) &&
-	    acthd < i915_gem_obj_offset(obj, vm) + obj->base.size)
-		return true;
-
-	return false;
-}
-
-static bool i915_head_inside_request(const u32 acthd_unmasked,
-				     const u32 request_start,
-				     const u32 request_end)
-{
-	const u32 acthd = acthd_unmasked & HEAD_ADDR;
-
-	if (request_start < request_end) {
-		if (acthd >= request_start && acthd < request_end)
-			return true;
-	} else if (request_start > request_end) {
-		if (acthd >= request_start || acthd < request_end)
-			return true;
-	}
-
-	return false;
-}
-
-static struct i915_address_space *
-request_to_vm(struct drm_i915_gem_request *request)
-{
-	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
-	struct i915_address_space *vm;
-
-	if (request->ctx)
-		vm = request->ctx->vm;
-	else
-		vm = &dev_priv->gtt.base;
-
-	return vm;
-}
-
-static bool i915_request_guilty(struct drm_i915_gem_request *request,
-				const u32 acthd, bool *inside)
-{
-	/* There is a possibility that unmasked head address
-	 * pointing inside the ring, matches the batch_obj address range.
-	 * However this is extremely unlikely.
-	 */
-	if (request->batch_obj) {
-		if (i915_head_inside_object(acthd, request->batch_obj,
-					    request_to_vm(request))) {
-			*inside = true;
-			return true;
-		}
-	}
-
-	if (i915_head_inside_request(acthd, request->head, request->tail)) {
-		*inside = false;
-		return true;
-	}
-
-	return false;
-}
-
 static bool i915_context_is_banned(const struct i915_hw_context *ctx)
 {
 	struct drm_i915_private *dev_priv;
@@ -2329,30 +2265,11 @@  static bool i915_context_is_banned(const struct i915_hw_context *ctx)
 	return false;
 }
 
-static void i915_set_reset_status(struct intel_ring_buffer *ring,
-				  struct drm_i915_gem_request *request,
-				  const bool guilty)
+static void i915_set_reset_status(struct i915_hw_context *ctx,
+				 const bool guilty)
 {
-	const u32 acthd = intel_ring_get_active_head(ring);
-	bool inside;
-	unsigned long offset = 0;
-	struct i915_hw_context *ctx = request->ctx;
 	struct i915_ctx_hang_stats *hs;
 
-	if (request->batch_obj)
-		offset = i915_gem_obj_offset(request->batch_obj,
-					     request_to_vm(request));
-
-	if (guilty &&
-	    i915_request_guilty(request, acthd, &inside)) {
-		DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
-			  ring->name,
-			  inside ? "inside" : "flushing",
-			  offset,
-			  ctx ? ctx->id : 0,
-			  acthd);
-	}
-
 	if (WARN_ON(!ctx))
 		return;
 
@@ -2407,10 +2324,10 @@  static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 
 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-	i915_set_reset_status(ring, request, ring_hung);
+	i915_set_reset_status(request->ctx, ring_hung);
 
 	list_for_each_entry_continue(request, &ring->request_list, list)
-		i915_set_reset_status(ring, request, false);
+		i915_set_reset_status(request->ctx, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,