Revert "drm/i915: use a separate context for gpu relocs"

Message ID	20191129124846.949100-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=IQTZ=ZV=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EBBB720869 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 29 Nov 2019 12:48:46 +0000 Message-Id: <20191129124846.949100-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH] Revert "drm/i915: use a separate context for gpu relocs" Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	Revert "drm/i915: use a separate context for gpu relocs" \| expand Revert "drm/i915: use a separate context for gpu relocs"

Message ID

20191129124846.949100-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EBBB720869
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri, 29 Nov 2019 12:48:46 +0000
Message-Id: <20191129124846.949100-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH] Revert "drm/i915: use a separate context for
 gpu relocs"
Precedence: list
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

Revert "drm/i915: use a separate context for gpu relocs" | expand

Commit Message

Chris Wilson Nov. 29, 2019, 12:48 p.m. UTC

Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT
invalidations"), we now disable the advanced preparser on Tigerlake for the
invalidation phase at the start of the batch, we no longer need to emit
the GPU relocations from a second context as they are now flushed inlined.

References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs")
References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 30 +------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

Comments

Mika Kuoppala Nov. 29, 2019, 1:07 p.m. UTC | #1

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT
> invalidations"), we now disable the advanced preparser on Tigerlake for the
> invalidation phase at the start of the batch, we no longer need to emit
> the GPU relocations from a second context as they are now flushed inlined.
>
> References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs")
> References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Not a complete revert, taking care of preserving the valuable
comment about preparser before gen12_emit_preempt_busywait().

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 30 +------------------
>  1 file changed, 1 insertion(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 7a87e8270460..459f4d40b69b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -253,7 +253,6 @@ struct i915_execbuffer {
>  		bool has_fence : 1;
>  		bool needs_unfenced : 1;
>  
> -		struct intel_context *ce;
>  		struct i915_request *rq;
>  		u32 *rq_cmd;
>  		unsigned int rq_size;
> @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb)
>  {
>  	GEM_BUG_ON(eb->reloc_cache.rq);
>  
> -	if (eb->reloc_cache.ce)
> -		intel_context_put(eb->reloc_cache.ce);
> -
>  	if (eb->lut_size > 0)
>  		kfree(eb->buckets);
>  }
> @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
>  	cache->has_fence = cache->gen < 4;
>  	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
>  	cache->node.flags = 0;
> -	cache->ce = NULL;
>  	cache->rq = NULL;
>  	cache->rq_size = 0;
>  }
> @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>  	if (err)
>  		goto err_unmap;
>  
> -	rq = intel_context_create_request(cache->ce);
> +	rq = i915_request_create(eb->context);
>  	if (IS_ERR(rq)) {
>  		err = PTR_ERR(rq);
>  		goto err_unpin;
> @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>  		if (!intel_engine_can_store_dword(eb->engine))
>  			return ERR_PTR(-ENODEV);
>  
> -		if (!cache->ce) {
> -			struct intel_context *ce;
> -
> -			/*
> -			 * The CS pre-parser can pre-fetch commands across
> -			 * memory sync points and starting gen12 it is able to
> -			 * pre-fetch across BB_START and BB_END boundaries
> -			 * (within the same context). We therefore use a
> -			 * separate context gen12+ to guarantee that the reloc
> -			 * writes land before the parser gets to the target
> -			 * memory location.
> -			 */
> -			if (cache->gen >= 12)
> -				ce = intel_context_create(eb->context->gem_context,
> -							  eb->engine);
> -			else
> -				ce = intel_context_get(eb->context);
> -			if (IS_ERR(ce))
> -				return ERR_CAST(ce);
> -
> -			cache->ce = ce;
> -		}
> -
>  		err = __reloc_gpu_alloc(eb, vma, len);
>  		if (unlikely(err))
>  			return ERR_PTR(err);
> -- 
> 2.24.0

Daniele Ceraolo Spurio Dec. 3, 2019, 10:19 p.m. UTC | #2

On 11/29/19 4:48 AM, Chris Wilson wrote:
> Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT
> invalidations"), we now disable the advanced preparser on Tigerlake for the
> invalidation phase at the start of the batch, we no longer need to emit
> the GPU relocations from a second context as they are now flushed inlined.
> 

c45e788d95b4 only applies to the RCS though and IIRC I've seen issues 
with the relocations on other engines as well, although they were much 
rarer. Also, the comment left in intel_lrc.c still references reloc_gpu().

Daniele

> References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs")
> References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 30 +------------------
>   1 file changed, 1 insertion(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 7a87e8270460..459f4d40b69b 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -253,7 +253,6 @@ struct i915_execbuffer {
>   		bool has_fence : 1;
>   		bool needs_unfenced : 1;
>   
> -		struct intel_context *ce;
>   		struct i915_request *rq;
>   		u32 *rq_cmd;
>   		unsigned int rq_size;
> @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb)
>   {
>   	GEM_BUG_ON(eb->reloc_cache.rq);
>   
> -	if (eb->reloc_cache.ce)
> -		intel_context_put(eb->reloc_cache.ce);
> -
>   	if (eb->lut_size > 0)
>   		kfree(eb->buckets);
>   }
> @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache,
>   	cache->has_fence = cache->gen < 4;
>   	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
>   	cache->node.flags = 0;
> -	cache->ce = NULL;
>   	cache->rq = NULL;
>   	cache->rq_size = 0;
>   }
> @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   	if (err)
>   		goto err_unmap;
>   
> -	rq = intel_context_create_request(cache->ce);
> +	rq = i915_request_create(eb->context);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_unpin;
> @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>   		if (!intel_engine_can_store_dword(eb->engine))
>   			return ERR_PTR(-ENODEV);
>   
> -		if (!cache->ce) {
> -			struct intel_context *ce;
> -
> -			/*
> -			 * The CS pre-parser can pre-fetch commands across
> -			 * memory sync points and starting gen12 it is able to
> -			 * pre-fetch across BB_START and BB_END boundaries
> -			 * (within the same context). We therefore use a
> -			 * separate context gen12+ to guarantee that the reloc
> -			 * writes land before the parser gets to the target
> -			 * memory location.
> -			 */
> -			if (cache->gen >= 12)
> -				ce = intel_context_create(eb->context->gem_context,
> -							  eb->engine);
> -			else
> -				ce = intel_context_get(eb->context);
> -			if (IS_ERR(ce))
> -				return ERR_CAST(ce);
> -
> -			cache->ce = ce;
> -		}
> -
>   		err = __reloc_gpu_alloc(eb, vma, len);
>   		if (unlikely(err))
>   			return ERR_PTR(err);
>

Chris Wilson Dec. 4, 2019, 6:21 p.m. UTC | #3

Quoting Daniele Ceraolo Spurio (2019-12-03 22:19:07)
> 
> 
> On 11/29/19 4:48 AM, Chris Wilson wrote:
> > Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT
> > invalidations"), we now disable the advanced preparser on Tigerlake for the
> > invalidation phase at the start of the batch, we no longer need to emit
> > the GPU relocations from a second context as they are now flushed inlined.
> > 
> 
> c45e788d95b4 only applies to the RCS though and IIRC I've seen issues 
> with the relocations on other engines as well, although they were much 
> rarer. Also, the comment left in intel_lrc.c still references reloc_gpu().

The tests we have don't discriminate between the engines, and the
so far we've only observed the issue on RCS. Look at igt_cs_tlb and see
what it might be missing wrt to triggering the issue on the other
engines.
-Chris

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 7a87e8270460..459f4d40b69b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -253,7 +253,6 @@  struct i915_execbuffer {
 		bool has_fence : 1;
 		bool needs_unfenced : 1;
 
-		struct intel_context *ce;
 		struct i915_request *rq;
 		u32 *rq_cmd;
 		unsigned int rq_size;
@@ -886,9 +885,6 @@  static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
-	if (eb->reloc_cache.ce)
-		intel_context_put(eb->reloc_cache.ce);
-
 	if (eb->lut_size > 0)
 		kfree(eb->buckets);
 }
@@ -912,7 +908,6 @@  static void reloc_cache_init(struct reloc_cache *cache,
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 	cache->node.flags = 0;
-	cache->ce = NULL;
 	cache->rq = NULL;
 	cache->rq_size = 0;
 }
@@ -1182,7 +1177,7 @@  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_unmap;
 
-	rq = intel_context_create_request(cache->ce);
+	rq = i915_request_create(eb->context);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1253,29 +1248,6 @@  static u32 *reloc_gpu(struct i915_execbuffer *eb,
 		if (!intel_engine_can_store_dword(eb->engine))
 			return ERR_PTR(-ENODEV);
 
-		if (!cache->ce) {
-			struct intel_context *ce;
-
-			/*
-			 * The CS pre-parser can pre-fetch commands across
-			 * memory sync points and starting gen12 it is able to
-			 * pre-fetch across BB_START and BB_END boundaries
-			 * (within the same context). We therefore use a
-			 * separate context gen12+ to guarantee that the reloc
-			 * writes land before the parser gets to the target
-			 * memory location.
-			 */
-			if (cache->gen >= 12)
-				ce = intel_context_create(eb->context->gem_context,
-							  eb->engine);
-			else
-				ce = intel_context_get(eb->context);
-			if (IS_ERR(ce))
-				return ERR_CAST(ce);
-
-			cache->ce = ce;
-		}
-
 		err = __reloc_gpu_alloc(eb, vma, len);
 		if (unlikely(err))
 			return ERR_PTR(err);

Revert "drm/i915: use a separate context for gpu relocs"

Commit Message

Comments

Patch