Message ID | 20191129124846.949100-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Revert "drm/i915: use a separate context for gpu relocs" | expand |
Chris Wilson <chris@chris-wilson.co.uk> writes: > Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT > invalidations"), we now disable the advanced preparser on Tigerlake for the > invalidation phase at the start of the batch, we no longer need to emit > the GPU relocations from a second context as they are now flushed inlined. > > References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs") > References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Not a complete revert, taking care of preserving the valuable comment about preparser before gen12_emit_preempt_busywait(). Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 30 +------------------ > 1 file changed, 1 insertion(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 7a87e8270460..459f4d40b69b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -253,7 +253,6 @@ struct i915_execbuffer { > bool has_fence : 1; > bool needs_unfenced : 1; > > - struct intel_context *ce; > struct i915_request *rq; > u32 *rq_cmd; > unsigned int rq_size; > @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb) > { > GEM_BUG_ON(eb->reloc_cache.rq); > > - if (eb->reloc_cache.ce) > - intel_context_put(eb->reloc_cache.ce); > - > if (eb->lut_size > 0) > kfree(eb->buckets); > } > @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache, > cache->has_fence = cache->gen < 4; > cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; > cache->node.flags = 0; > - cache->ce = NULL; > cache->rq = NULL; > cache->rq_size = 0; > } > @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, > if (err) > goto err_unmap; > > - rq = intel_context_create_request(cache->ce); > + rq = i915_request_create(eb->context); > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > goto err_unpin; > @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, > if (!intel_engine_can_store_dword(eb->engine)) > return ERR_PTR(-ENODEV); > > - if (!cache->ce) { > - struct intel_context *ce; > - > - /* > - * The CS pre-parser can pre-fetch commands across > - * memory sync points and starting gen12 it is able to > - * pre-fetch across BB_START and BB_END boundaries > - * (within the same context). We therefore use a > - * separate context gen12+ to guarantee that the reloc > - * writes land before the parser gets to the target > - * memory location. > - */ > - if (cache->gen >= 12) > - ce = intel_context_create(eb->context->gem_context, > - eb->engine); > - else > - ce = intel_context_get(eb->context); > - if (IS_ERR(ce)) > - return ERR_CAST(ce); > - > - cache->ce = ce; > - } > - > err = __reloc_gpu_alloc(eb, vma, len); > if (unlikely(err)) > return ERR_PTR(err); > -- > 2.24.0
On 11/29/19 4:48 AM, Chris Wilson wrote: > Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT > invalidations"), we now disable the advanced preparser on Tigerlake for the > invalidation phase at the start of the batch, we no longer need to emit > the GPU relocations from a second context as they are now flushed inlined. > c45e788d95b4 only applies to the RCS though and IIRC I've seen issues with the relocations on other engines as well, although they were much rarer. Also, the comment left in intel_lrc.c still references reloc_gpu(). Daniele > References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs") > References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 30 +------------------ > 1 file changed, 1 insertion(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 7a87e8270460..459f4d40b69b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -253,7 +253,6 @@ struct i915_execbuffer { > bool has_fence : 1; > bool needs_unfenced : 1; > > - struct intel_context *ce; > struct i915_request *rq; > u32 *rq_cmd; > unsigned int rq_size; > @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb) > { > GEM_BUG_ON(eb->reloc_cache.rq); > > - if (eb->reloc_cache.ce) > - intel_context_put(eb->reloc_cache.ce); > - > if (eb->lut_size > 0) > kfree(eb->buckets); > } > @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache, > cache->has_fence = cache->gen < 4; > cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; > cache->node.flags = 0; > - cache->ce = NULL; > cache->rq = NULL; > cache->rq_size = 0; > } > @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, > if (err) > goto err_unmap; > > - rq = intel_context_create_request(cache->ce); > + rq = i915_request_create(eb->context); > if (IS_ERR(rq)) { > err = PTR_ERR(rq); > goto err_unpin; > @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, > if (!intel_engine_can_store_dword(eb->engine)) > return ERR_PTR(-ENODEV); > > - if (!cache->ce) { > - struct intel_context *ce; > - > - /* > - * The CS pre-parser can pre-fetch commands across > - * memory sync points and starting gen12 it is able to > - * pre-fetch across BB_START and BB_END boundaries > - * (within the same context). We therefore use a > - * separate context gen12+ to guarantee that the reloc > - * writes land before the parser gets to the target > - * memory location. > - */ > - if (cache->gen >= 12) > - ce = intel_context_create(eb->context->gem_context, > - eb->engine); > - else > - ce = intel_context_get(eb->context); > - if (IS_ERR(ce)) > - return ERR_CAST(ce); > - > - cache->ce = ce; > - } > - > err = __reloc_gpu_alloc(eb, vma, len); > if (unlikely(err)) > return ERR_PTR(err); >
Quoting Daniele Ceraolo Spurio (2019-12-03 22:19:07) > > > On 11/29/19 4:48 AM, Chris Wilson wrote: > > Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT > > invalidations"), we now disable the advanced preparser on Tigerlake for the > > invalidation phase at the start of the batch, we no longer need to emit > > the GPU relocations from a second context as they are now flushed inlined. > > > > c45e788d95b4 only applies to the RCS though and IIRC I've seen issues > with the relocations on other engines as well, although they were much > rarer. Also, the comment left in intel_lrc.c still references reloc_gpu(). The tests we have don't discriminate between the engines, and the so far we've only observed the issue on RCS. Look at igt_cs_tlb and see what it might be missing wrt to triggering the issue on the other engines. -Chris
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 7a87e8270460..459f4d40b69b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -253,7 +253,6 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; - struct intel_context *ce; struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; @@ -886,9 +885,6 @@ static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->reloc_cache.ce) - intel_context_put(eb->reloc_cache.ce); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -912,7 +908,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - cache->ce = NULL; cache->rq = NULL; cache->rq_size = 0; } @@ -1182,7 +1177,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = intel_context_create_request(cache->ce); + rq = i915_request_create(eb->context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1253,29 +1248,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); - if (!cache->ce) { - struct intel_context *ce; - - /* - * The CS pre-parser can pre-fetch commands across - * memory sync points and starting gen12 it is able to - * pre-fetch across BB_START and BB_END boundaries - * (within the same context). We therefore use a - * separate context gen12+ to guarantee that the reloc - * writes land before the parser gets to the target - * memory location. - */ - if (cache->gen >= 12) - ce = intel_context_create(eb->context->gem_context, - eb->engine); - else - ce = intel_context_get(eb->context); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - cache->ce = ce; - } - err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err);
Since commit c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations"), we now disable the advanced preparser on Tigerlake for the invalidation phase at the start of the batch, we no longer need to emit the GPU relocations from a second context as they are now flushed inlined. References: 8a9a982767b7 ("drm/i915: use a separate context for gpu relocs") References: c45e788d95b4 ("drm/i915/tgl: Suspend pre-parser across GTT invalidations") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 30 +------------------ 1 file changed, 1 insertion(+), 29 deletions(-)