diff mbox

[v2,1/3] drm/i915: Tune down debug output when context is banned

Message ID 1390405289-4920-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Jan. 22, 2014, 3:41 p.m. UTC
If we have stopped rings then we know that test is running
so no need for spam. In addition, only spam when default
context gets banned.

v2: - make sure default context ban gets shown (Chris)
    - use helper for checking for default context, everywhere (Chris)

Reference: https://bugs.freedesktop.org/show_bug.cgi?id=73652
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |    5 +++++
 drivers/gpu/drm/i915/i915_gem.c         |   18 +++++++++++-------
 drivers/gpu/drm/i915/i915_gem_context.c |    9 ++-------
 3 files changed, 18 insertions(+), 14 deletions(-)

Comments

Ben Widawsky Jan. 26, 2014, 6:17 p.m. UTC | #1
On Wed, Jan 22, 2014 at 05:41:29PM +0200, Mika Kuoppala wrote:
> If we have stopped rings then we know that test is running
> so no need for spam. In addition, only spam when default
> context gets banned.
> 
> v2: - make sure default context ban gets shown (Chris)
>     - use helper for checking for default context, everywhere (Chris)
> 

Overall, passing requests around this much to these functions seems
awkward to me. A lot of my comments are targeting that. If you're
totally of a different mind, I can reconsider.

> Reference: https://bugs.freedesktop.org/show_bug.cgi?id=73652
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |    5 +++++
>  drivers/gpu/drm/i915/i915_gem.c         |   18 +++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_context.c |    9 ++-------
>  3 files changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f888fea..3a43388 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2267,6 +2267,11 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
>  		kref_put(&ctx->ref, i915_gem_context_free);
>  }
>  
> +static inline bool i915_gem_context_is_default(struct i915_hw_context *ctx)
> +{
> +	return (ctx->id == DEFAULT_CONTEXT_ID);
> +}
> +

Drop the parenthesis - I know that it's my bad.

>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  				  struct drm_file *file);
>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5fcdb14..36d18d8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2305,15 +2305,20 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request,
>  	return false;
>  }
>  
> -static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
> +static bool i915_context_is_banned(const struct drm_i915_gem_request *request,
> +				   const struct i915_ctx_hang_stats *hs)
>  {
>  	const unsigned long elapsed = get_seconds() - hs->guilty_ts;
> +	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
>  
>  	if (hs->banned)
>  		return true;
>  
>  	if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
> -		DRM_ERROR("context hanging too fast, declaring banned!\n");
> +		if (dev_priv->gpu_error.stop_rings == 0 &&
> +		    request->ctx && i915_gem_context_is_default(request->ctx))
> +			DRM_ERROR("context hanging too fast, declaring banned!\n");
> +

Correct me if I am wrong, but, request->ctx should always be non-NULL
when using full PPGTT, and hardware contexts (not sure if this patch is
only targeted for that). In case of the latter, HAS_HW_CONTEXTS should
be == request->ctx. Passing a request to a function which shouldn't care
about requests feels bad to me. As such, I'd vote for:

i915_context_is_banned(struct i915_hw_context *ctx) - you can similarly
thread the ctx up through the call stack, instead of the hs.

I'd also suggest making i915_gem_context_is_default() check regardless
of stop_rings. The last request is optional.

>  		return true;
>  	}
>  
> @@ -2347,17 +2352,16 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
>  		guilty = true;
>  	}
>  
> -	/* If contexts are disabled or this is the default context, use
> -	 * file_priv->reset_state
> -	 */
> -	if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
> +	/* For default context, we track reset statistics per file_priv
> +	 * to allow more fine grained control */
> +	if (request->ctx && !i915_gem_context_is_default(request->ctx))

As I said above, you could replace request->ctx here if you wanted.

>  		hs = &request->ctx->hang_stats;
>  	else if (request->file_priv)
>  		hs = &request->file_priv->private_default_ctx->hang_stats;
>  
>  	if (hs) {
>  		if (guilty) {
> -			hs->banned = i915_context_is_banned(hs);
> +			hs->banned = i915_context_is_banned(request, hs);
>  			hs->batch_active++;
>  			hs->guilty_ts = get_seconds();
>  		} else {
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 112f865..4c55730 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -228,11 +228,6 @@ err_out:
>  	return ERR_PTR(ret);
>  }
>  
> -static inline bool is_default_context(struct i915_hw_context *ctx)
> -{
> -	return (ctx->id == DEFAULT_CONTEXT_ID);
> -}
> -
>  /**
>   * The default context needs to exist per ring that uses contexts. It stores the
>   * context state of the GPU for applications that don't utilize HW contexts, as
> @@ -474,7 +469,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
>  	struct i915_hw_context *ctx = p;
>  
>  	/* Ignore the default context because close will handle it */
> -	if (is_default_context(ctx))
> +	if (i915_gem_context_is_default(ctx))
>  		return 0;
>  
>  	i915_gem_context_unreference(ctx);
> @@ -649,7 +644,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  		vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
>  	}
>  
> -	if (!to->is_initialized || is_default_context(to))
> +	if (!to->is_initialized || i915_gem_context_is_default(to))
>  		hw_flags |= MI_RESTORE_INHIBIT;
>  
>  	ret = mi_set_context(ring, to, hw_flags);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Mika Kuoppala Jan. 29, 2014, 3:28 p.m. UTC | #2
Ben Widawsky <ben@bwidawsk.net> writes:

> On Wed, Jan 22, 2014 at 05:41:29PM +0200, Mika Kuoppala wrote:
>> If we have stopped rings then we know that test is running
>> so no need for spam. In addition, only spam when default
>> context gets banned.
>> 
>> v2: - make sure default context ban gets shown (Chris)
>>     - use helper for checking for default context, everywhere (Chris)
>> 
>
> Overall, passing requests around this much to these functions seems
> awkward to me. A lot of my comments are targeting that. If you're
> totally of a different mind, I can reconsider.
>
>> Reference: https://bugs.freedesktop.org/show_bug.cgi?id=73652
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h         |    5 +++++
>>  drivers/gpu/drm/i915/i915_gem.c         |   18 +++++++++++-------
>>  drivers/gpu/drm/i915/i915_gem_context.c |    9 ++-------
>>  3 files changed, 18 insertions(+), 14 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index f888fea..3a43388 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2267,6 +2267,11 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
>>  		kref_put(&ctx->ref, i915_gem_context_free);
>>  }
>>  
>> +static inline bool i915_gem_context_is_default(struct i915_hw_context *ctx)
>> +{
>> +	return (ctx->id == DEFAULT_CONTEXT_ID);
>> +}
>> +
>
> Drop the parenthesis - I know that it's my bad.
>
>>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>>  				  struct drm_file *file);
>>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 5fcdb14..36d18d8 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2305,15 +2305,20 @@ static bool i915_request_guilty(struct drm_i915_gem_request *request,
>>  	return false;
>>  }
>>  
>> -static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
>> +static bool i915_context_is_banned(const struct drm_i915_gem_request *request,
>> +				   const struct i915_ctx_hang_stats *hs)
>>  {
>>  	const unsigned long elapsed = get_seconds() - hs->guilty_ts;
>> +	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
>>  
>>  	if (hs->banned)
>>  		return true;
>>  
>>  	if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
>> -		DRM_ERROR("context hanging too fast, declaring banned!\n");
>> +		if (dev_priv->gpu_error.stop_rings == 0 &&
>> +		    request->ctx && i915_gem_context_is_default(request->ctx))
>> +			DRM_ERROR("context hanging too fast, declaring banned!\n");
>> +
>
> Correct me if I am wrong, but, request->ctx should always be non-NULL
> when using full PPGTT, and hardware contexts (not sure if this patch is
> only targeted for that). In case of the latter, HAS_HW_CONTEXTS should
> be == request->ctx. Passing a request to a function which shouldn't care
> about requests feels bad to me. As such, I'd vote for:
>
> i915_context_is_banned(struct i915_hw_context *ctx) - you can similarly
> thread the ctx up through the call stack, instead of the hs.
>

I tried to take advantage of above and came up with:
1391007939-5741-2-git-send-email-mika.kuoppala@intel.com

With 4/4 applied on that series, we only pass i915_hw_context *'s around.

> I'd also suggest making i915_gem_context_is_default() check regardless
> of stop_rings. The last request is optional.
>

Hmm. I think logic is correct here.

Only output if tests are not running and if so, only if we are
dealing with default context.

-Mika

>>  		return true;
>>  	}
>>  
>> @@ -2347,17 +2352,16 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
>>  		guilty = true;
>>  	}
>>  
>> -	/* If contexts are disabled or this is the default context, use
>> -	 * file_priv->reset_state
>> -	 */
>> -	if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
>> +	/* For default context, we track reset statistics per file_priv
>> +	 * to allow more fine grained control */
>> +	if (request->ctx && !i915_gem_context_is_default(request->ctx))
>
> As I said above, you could replace request->ctx here if you wanted.
>
>>  		hs = &request->ctx->hang_stats;
>>  	else if (request->file_priv)
>>  		hs = &request->file_priv->private_default_ctx->hang_stats;
>>  
>>  	if (hs) {
>>  		if (guilty) {
>> -			hs->banned = i915_context_is_banned(hs);
>> +			hs->banned = i915_context_is_banned(request, hs);
>>  			hs->batch_active++;
>>  			hs->guilty_ts = get_seconds();
>>  		} else {
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 112f865..4c55730 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -228,11 +228,6 @@ err_out:
>>  	return ERR_PTR(ret);
>>  }
>>  
>> -static inline bool is_default_context(struct i915_hw_context *ctx)
>> -{
>> -	return (ctx->id == DEFAULT_CONTEXT_ID);
>> -}
>> -
>>  /**
>>   * The default context needs to exist per ring that uses contexts. It stores the
>>   * context state of the GPU for applications that don't utilize HW contexts, as
>> @@ -474,7 +469,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
>>  	struct i915_hw_context *ctx = p;
>>  
>>  	/* Ignore the default context because close will handle it */
>> -	if (is_default_context(ctx))
>> +	if (i915_gem_context_is_default(ctx))
>>  		return 0;
>>  
>>  	i915_gem_context_unreference(ctx);
>> @@ -649,7 +644,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>>  		vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
>>  	}
>>  
>> -	if (!to->is_initialized || is_default_context(to))
>> +	if (!to->is_initialized || i915_gem_context_is_default(to))
>>  		hw_flags |= MI_RESTORE_INHIBIT;
>>  
>>  	ret = mi_set_context(ring, to, hw_flags);
>> -- 
>> 1.7.9.5
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Ben Widawsky, Intel Open Source Technology Center
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f888fea..3a43388 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2267,6 +2267,11 @@  static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
 		kref_put(&ctx->ref, i915_gem_context_free);
 }
 
+static inline bool i915_gem_context_is_default(struct i915_hw_context *ctx)
+{
+	return (ctx->id == DEFAULT_CONTEXT_ID);
+}
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5fcdb14..36d18d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2305,15 +2305,20 @@  static bool i915_request_guilty(struct drm_i915_gem_request *request,
 	return false;
 }
 
-static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
+static bool i915_context_is_banned(const struct drm_i915_gem_request *request,
+				   const struct i915_ctx_hang_stats *hs)
 {
 	const unsigned long elapsed = get_seconds() - hs->guilty_ts;
+	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
 
 	if (hs->banned)
 		return true;
 
 	if (elapsed <= DRM_I915_CTX_BAN_PERIOD) {
-		DRM_ERROR("context hanging too fast, declaring banned!\n");
+		if (dev_priv->gpu_error.stop_rings == 0 &&
+		    request->ctx && i915_gem_context_is_default(request->ctx))
+			DRM_ERROR("context hanging too fast, declaring banned!\n");
+
 		return true;
 	}
 
@@ -2347,17 +2352,16 @@  static void i915_set_reset_status(struct intel_ring_buffer *ring,
 		guilty = true;
 	}
 
-	/* If contexts are disabled or this is the default context, use
-	 * file_priv->reset_state
-	 */
-	if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID)
+	/* For default context, we track reset statistics per file_priv
+	 * to allow more fine grained control */
+	if (request->ctx && !i915_gem_context_is_default(request->ctx))
 		hs = &request->ctx->hang_stats;
 	else if (request->file_priv)
 		hs = &request->file_priv->private_default_ctx->hang_stats;
 
 	if (hs) {
 		if (guilty) {
-			hs->banned = i915_context_is_banned(hs);
+			hs->banned = i915_context_is_banned(request, hs);
 			hs->batch_active++;
 			hs->guilty_ts = get_seconds();
 		} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 112f865..4c55730 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -228,11 +228,6 @@  err_out:
 	return ERR_PTR(ret);
 }
 
-static inline bool is_default_context(struct i915_hw_context *ctx)
-{
-	return (ctx->id == DEFAULT_CONTEXT_ID);
-}
-
 /**
  * The default context needs to exist per ring that uses contexts. It stores the
  * context state of the GPU for applications that don't utilize HW contexts, as
@@ -474,7 +469,7 @@  static int context_idr_cleanup(int id, void *p, void *data)
 	struct i915_hw_context *ctx = p;
 
 	/* Ignore the default context because close will handle it */
-	if (is_default_context(ctx))
+	if (i915_gem_context_is_default(ctx))
 		return 0;
 
 	i915_gem_context_unreference(ctx);
@@ -649,7 +644,7 @@  static int do_switch(struct intel_ring_buffer *ring,
 		vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND);
 	}
 
-	if (!to->is_initialized || is_default_context(to))
+	if (!to->is_initialized || i915_gem_context_is_default(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
 
 	ret = mi_set_context(ring, to, hw_flags);