diff mbox

[16/59] drm/i915: Add flag to i915_add_request() to skip the cache flush

Message ID 1426768264-16996-17-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison March 19, 2015, 12:30 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

In order to explcitly track all GPU work (and completely remove the outstanding
lazy request), it is necessary to add extra i915_add_request() calls to various
places. Some of these do not need the implicit cache flush done as part of the
standard batch buffer submission process.

This patch adds a flag to _add_request() to specify whether the flush is
required or not.

For: VIZ-5115
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              |    7 +++++--
 drivers/gpu/drm/i915/i915_gem.c              |   17 ++++++++++-------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |    2 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |    2 +-
 drivers/gpu/drm/i915/intel_lrc.c             |    2 +-
 5 files changed, 18 insertions(+), 12 deletions(-)

Comments

Tomas Elf March 31, 2015, 4:32 p.m. UTC | #1
On 19/03/2015 12:30, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> In order to explcitly track all GPU work (and completely remove the outstanding
> lazy request), it is necessary to add extra i915_add_request() calls to various
> places. Some of these do not need the implicit cache flush done as part of the
> standard batch buffer submission process.
>
> This patch adds a flag to _add_request() to specify whether the flush is
> required or not.
>
> For: VIZ-5115
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h              |    7 +++++--
>   drivers/gpu/drm/i915/i915_gem.c              |   17 ++++++++++-------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |    2 +-
>   drivers/gpu/drm/i915/i915_gem_render_state.c |    2 +-
>   drivers/gpu/drm/i915/intel_lrc.c             |    2 +-
>   5 files changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d3b718e..4bcb43f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2751,9 +2751,12 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>   int __must_check i915_gem_suspend(struct drm_device *dev);
>   void __i915_add_request(struct intel_engine_cs *ring,
>   			struct drm_file *file,
> -			struct drm_i915_gem_object *batch_obj);
> +			struct drm_i915_gem_object *batch_obj,
> +			bool flush_caches);
>   #define i915_add_request(ring) \
> -	__i915_add_request(ring, NULL, NULL)
> +	__i915_add_request(ring, NULL, NULL, true)
> +#define i915_add_request_no_flush(ring) \
> +	__i915_add_request(ring, NULL, NULL, false)
>   int __i915_wait_request(struct drm_i915_gem_request *req,
>   			unsigned reset_counter,
>   			bool interruptible,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9a335d5..f143d15 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2329,7 +2329,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>    */
>   void __i915_add_request(struct intel_engine_cs *ring,
>   			struct drm_file *file,
> -			struct drm_i915_gem_object *obj)
> +			struct drm_i915_gem_object *obj,
> +			bool flush_caches)
>   {
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	struct drm_i915_gem_request *request;
> @@ -2361,12 +2362,14 @@ void __i915_add_request(struct intel_engine_cs *ring,
>   	 * is that the flush _must_ happen before the next request, no matter
>   	 * what.
>   	 */
> -	if (i915.enable_execlists)
> -		ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> -	else
> -		ret = intel_ring_flush_all_caches(ring);
> -	/* Not allowed to fail! */
> -	WARN_ON(ret);
> +	if (flush_caches) {
> +		if (i915.enable_execlists)
> +			ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> +		else
> +			ret = intel_ring_flush_all_caches(ring);
> +		/* Not allowed to fail! */
> +		WARN_ON(ret);

There has been a discussion about whether it's ok to use 
WARN_ON(<variable/constant>) since it doesn't add any useful information 
about the actual failure case to the kernel log. By that logic you 
should add WARN_ON to each individual call to _flush_all_caches above 
but I would be inclined to say that that would be slightly redundant. It 
could be argued that if you get a WARN stack_dump in the kernel log at 
this point you pretty much know from where it came.

Although, if you want to be able to rely entirely on the kernel log and 
don't want to read the source code to figure out what function call 
failed then you would have to either add WARN_ONs to each individual 
function call or replace WARN_ON(ret) with something like WARN(ret, 
"flush_all_caches returned %d", ret) or something.

Any other opinion on how we should do this from anyone? What pattern 
should we be following here?

> +	}
>
>   	/* Record the position of the start of the request so that
>   	 * should we detect the updated seqno part-way through the
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3173550..c0be7d7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1060,7 +1060,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
>   	params->ring->gpu_caches_dirty = true;
>
>   	/* Add a breadcrumb for the completion of the batch buffer */
> -	__i915_add_request(params->ring, params->file, params->batch_obj);
> +	__i915_add_request(params->ring, params->file, params->batch_obj, true);
>   }
>
>   static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index ce4788f..4418616 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -173,7 +173,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>
>   	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
>
> -	__i915_add_request(ring, NULL, so.obj);
> +	__i915_add_request(ring, NULL, so.obj, true);
>   	/* __i915_add_request moves object to inactive if it fails */
>   out:
>   	i915_gem_render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8c69f88..4922725 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1350,7 +1350,7 @@ static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>
>   	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
>
> -	__i915_add_request(ring, file, so.obj);
> +	__i915_add_request(ring, file, so.obj, true);
>   	/* intel_logical_ring_add_request moves object to inactive if it
>   	 * fails */
>   out:
>
Daniel Vetter April 1, 2015, 5:59 a.m. UTC | #2
On Tue, Mar 31, 2015 at 05:32:07PM +0100, Tomas Elf wrote:
> On 19/03/2015 12:30, John.C.Harrison@Intel.com wrote:
> >@@ -2361,12 +2362,14 @@ void __i915_add_request(struct intel_engine_cs *ring,
> >  	 * is that the flush _must_ happen before the next request, no matter
> >  	 * what.
> >  	 */
> >-	if (i915.enable_execlists)
> >-		ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> >-	else
> >-		ret = intel_ring_flush_all_caches(ring);
> >-	/* Not allowed to fail! */
> >-	WARN_ON(ret);
> >+	if (flush_caches) {
> >+		if (i915.enable_execlists)
> >+			ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> >+		else
> >+			ret = intel_ring_flush_all_caches(ring);
> >+		/* Not allowed to fail! */
> >+		WARN_ON(ret);
> 
> There has been a discussion about whether it's ok to use
> WARN_ON(<variable/constant>) since it doesn't add any useful information
> about the actual failure case to the kernel log. By that logic you should
> add WARN_ON to each individual call to _flush_all_caches above but I would
> be inclined to say that that would be slightly redundant. It could be argued
> that if you get a WARN stack_dump in the kernel log at this point you pretty
> much know from where it came.
> 
> Although, if you want to be able to rely entirely on the kernel log and
> don't want to read the source code to figure out what function call failed
> then you would have to either add WARN_ONs to each individual function call
> or replace WARN_ON(ret) with something like WARN(ret, "flush_all_caches
> returned %d", ret) or something.
> 
> Any other opinion on how we should do this from anyone? What pattern should
> we be following here?

WARN_ON dumps the full backtrace plus current EIP/RIP including basic
decoding using debug information (if compiled in). Maybe you don't have
that enabled in Android builds? Or is there additional information you
want to dump?
-Daniel
Chris Wilson April 1, 2015, 8:52 a.m. UTC | #3
On Tue, Mar 31, 2015 at 05:32:07PM +0100, Tomas Elf wrote:
> On 19/03/2015 12:30, John.C.Harrison@Intel.com wrote:
> >From: John Harrison <John.C.Harrison@Intel.com>
> >
> >In order to explcitly track all GPU work (and completely remove the outstanding
> >lazy request), it is necessary to add extra i915_add_request() calls to various
> >places. Some of these do not need the implicit cache flush done as part of the
> >standard batch buffer submission process.
> >
> >This patch adds a flag to _add_request() to specify whether the flush is
> >required or not.

No. Please do cache emission on the request explicitly, do not start
overloading add_request.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d3b718e..4bcb43f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2751,9 +2751,12 @@  int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct intel_engine_cs *ring,
 			struct drm_file *file,
-			struct drm_i915_gem_object *batch_obj);
+			struct drm_i915_gem_object *batch_obj,
+			bool flush_caches);
 #define i915_add_request(ring) \
-	__i915_add_request(ring, NULL, NULL)
+	__i915_add_request(ring, NULL, NULL, true)
+#define i915_add_request_no_flush(ring) \
+	__i915_add_request(ring, NULL, NULL, false)
 int __i915_wait_request(struct drm_i915_gem_request *req,
 			unsigned reset_counter,
 			bool interruptible,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9a335d5..f143d15 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2329,7 +2329,8 @@  i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
  */
 void __i915_add_request(struct intel_engine_cs *ring,
 			struct drm_file *file,
-			struct drm_i915_gem_object *obj)
+			struct drm_i915_gem_object *obj,
+			bool flush_caches)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
@@ -2361,12 +2362,14 @@  void __i915_add_request(struct intel_engine_cs *ring,
 	 * is that the flush _must_ happen before the next request, no matter
 	 * what.
 	 */
-	if (i915.enable_execlists)
-		ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
-	else
-		ret = intel_ring_flush_all_caches(ring);
-	/* Not allowed to fail! */
-	WARN_ON(ret);
+	if (flush_caches) {
+		if (i915.enable_execlists)
+			ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
+		else
+			ret = intel_ring_flush_all_caches(ring);
+		/* Not allowed to fail! */
+		WARN_ON(ret);
+	}
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3173550..c0be7d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1060,7 +1060,7 @@  i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
 	params->ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	__i915_add_request(params->ring, params->file, params->batch_obj);
+	__i915_add_request(params->ring, params->file, params->batch_obj, true);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index ce4788f..4418616 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -173,7 +173,7 @@  int i915_gem_render_state_init(struct intel_engine_cs *ring)
 
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
 
-	__i915_add_request(ring, NULL, so.obj);
+	__i915_add_request(ring, NULL, so.obj, true);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	i915_gem_render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8c69f88..4922725 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1350,7 +1350,7 @@  static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
 
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
 
-	__i915_add_request(ring, file, so.obj);
+	__i915_add_request(ring, file, so.obj, true);
 	/* intel_logical_ring_add_request moves object to inactive if it
 	 * fails */
 out: