diff mbox series

drm/i915: Remove redundant i915_request_await_object in blit clears

Message ID 20200615140928.27336-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Remove redundant i915_request_await_object in blit clears | expand

Commit Message

Tvrtko Ursulin June 15, 2020, 2:09 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

One i915_request_await_object is enough and we keep the one under the
object lock so it is final.

At the same time move async clflushing setup under the same locked
section and consolidate common code into a helper function.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 35 +++++++------------
 1 file changed, 13 insertions(+), 22 deletions(-)

Comments

Chris Wilson June 15, 2020, 2:30 p.m. UTC | #1
Quoting Tvrtko Ursulin (2020-06-15 15:09:28)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> One i915_request_await_object is enough and we keep the one under the
> object lock so it is final.
> 
> At the same time move async clflushing setup under the same locked
> section and consolidate common code into a helper function.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michael J. Ruhl <michael.j.ruhl@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 35 +++++++------------
>  1 file changed, 13 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> index f457d7130491..7d8b396e265a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> @@ -126,6 +126,17 @@ void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
>         intel_engine_pm_put(ce->engine);
>  }
>  
> +static int
> +move_obj_to_gpu(struct drm_i915_gem_object *obj,
> +               struct i915_request *rq,
> +               bool write)
> +{
> +       if (obj->cache_dirty & ~obj->cache_coherent)
> +               i915_gem_clflush_object(obj, 0);
> +
> +       return i915_request_await_object(rq, obj, write);
> +}
> +
>  int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>                              struct intel_context *ce,
>                              u32 value)
> @@ -143,12 +154,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>         if (unlikely(err))
>                 return err;
>  
> -       if (obj->cache_dirty & ~obj->cache_coherent) {
> -               i915_gem_object_lock(obj);
> -               i915_gem_clflush_object(obj, 0);
> -               i915_gem_object_unlock(obj);
> -       }
> -
>         batch = intel_emit_vma_fill_blt(ce, vma, value);
>         if (IS_ERR(batch)) {
>                 err = PTR_ERR(batch);
> @@ -165,10 +170,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>         if (unlikely(err))
>                 goto out_request;
>  
> -       err = i915_request_await_object(rq, obj, true);
> -       if (unlikely(err))
> -               goto out_request;
> -
>         if (ce->engine->emit_init_breadcrumb) {
>                 err = ce->engine->emit_init_breadcrumb(rq);
>                 if (unlikely(err))
> @@ -176,7 +177,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>         }
>  
>         i915_vma_lock(vma);
> -       err = i915_request_await_object(rq, vma->obj, true);
> +       err = move_obj_to_gpu(vma->obj, rq, true);
>         if (err == 0)
>                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>         i915_vma_unlock(vma);

Ah, but here it's also the wrong side of init_breadcrumb.
-Chris
Tvrtko Ursulin June 15, 2020, 2:54 p.m. UTC | #2
On 15/06/2020 15:30, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-06-15 15:09:28)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> One i915_request_await_object is enough and we keep the one under the
>> object lock so it is final.
>>
>> At the same time move async clflushing setup under the same locked
>> section and consolidate common code into a helper function.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Michael J. Ruhl <michael.j.ruhl@intel.com>
>> ---
>>   .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 35 +++++++------------
>>   1 file changed, 13 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
>> index f457d7130491..7d8b396e265a 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
>> @@ -126,6 +126,17 @@ void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
>>          intel_engine_pm_put(ce->engine);
>>   }
>>   
>> +static int
>> +move_obj_to_gpu(struct drm_i915_gem_object *obj,
>> +               struct i915_request *rq,
>> +               bool write)
>> +{
>> +       if (obj->cache_dirty & ~obj->cache_coherent)
>> +               i915_gem_clflush_object(obj, 0);
>> +
>> +       return i915_request_await_object(rq, obj, write);
>> +}
>> +
>>   int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>>                               struct intel_context *ce,
>>                               u32 value)
>> @@ -143,12 +154,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>>          if (unlikely(err))
>>                  return err;
>>   
>> -       if (obj->cache_dirty & ~obj->cache_coherent) {
>> -               i915_gem_object_lock(obj);
>> -               i915_gem_clflush_object(obj, 0);
>> -               i915_gem_object_unlock(obj);
>> -       }
>> -
>>          batch = intel_emit_vma_fill_blt(ce, vma, value);
>>          if (IS_ERR(batch)) {
>>                  err = PTR_ERR(batch);
>> @@ -165,10 +170,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>>          if (unlikely(err))
>>                  goto out_request;
>>   
>> -       err = i915_request_await_object(rq, obj, true);
>> -       if (unlikely(err))
>> -               goto out_request;
>> -
>>          if (ce->engine->emit_init_breadcrumb) {
>>                  err = ce->engine->emit_init_breadcrumb(rq);
>>                  if (unlikely(err))
>> @@ -176,7 +177,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
>>          }
>>   
>>          i915_vma_lock(vma);
>> -       err = i915_request_await_object(rq, vma->obj, true);
>> +       err = move_obj_to_gpu(vma->obj, rq, true);
>>          if (err == 0)
>>                  err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>>          i915_vma_unlock(vma);
> 
> Ah, but here it's also the wrong side of init_breadcrumb.

Why it is important to mark the object as active on the failure path? We 
skip the payload, no?

Regards,

Tvrtko
Chris Wilson June 15, 2020, 3:01 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-06-15 15:54:56)
> 
> On 15/06/2020 15:30, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-06-15 15:09:28)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> One i915_request_await_object is enough and we keep the one under the
> >> object lock so it is final.
> >>
> >> At the same time move async clflushing setup under the same locked
> >> section and consolidate common code into a helper function.
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Matthew Auld <matthew.auld@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Michael J. Ruhl <michael.j.ruhl@intel.com>
> >> ---
> >>   .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 35 +++++++------------
> >>   1 file changed, 13 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> >> index f457d7130491..7d8b396e265a 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> >> @@ -126,6 +126,17 @@ void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
> >>          intel_engine_pm_put(ce->engine);
> >>   }
> >>   
> >> +static int
> >> +move_obj_to_gpu(struct drm_i915_gem_object *obj,
> >> +               struct i915_request *rq,
> >> +               bool write)
> >> +{
> >> +       if (obj->cache_dirty & ~obj->cache_coherent)
> >> +               i915_gem_clflush_object(obj, 0);
> >> +
> >> +       return i915_request_await_object(rq, obj, write);
> >> +}
> >> +
> >>   int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
> >>                               struct intel_context *ce,
> >>                               u32 value)
> >> @@ -143,12 +154,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
> >>          if (unlikely(err))
> >>                  return err;
> >>   
> >> -       if (obj->cache_dirty & ~obj->cache_coherent) {
> >> -               i915_gem_object_lock(obj);
> >> -               i915_gem_clflush_object(obj, 0);
> >> -               i915_gem_object_unlock(obj);
> >> -       }
> >> -
> >>          batch = intel_emit_vma_fill_blt(ce, vma, value);
> >>          if (IS_ERR(batch)) {
> >>                  err = PTR_ERR(batch);
> >> @@ -165,10 +170,6 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
> >>          if (unlikely(err))
> >>                  goto out_request;
> >>   
> >> -       err = i915_request_await_object(rq, obj, true);
> >> -       if (unlikely(err))
> >> -               goto out_request;
> >> -
> >>          if (ce->engine->emit_init_breadcrumb) {
> >>                  err = ce->engine->emit_init_breadcrumb(rq);
> >>                  if (unlikely(err))
> >> @@ -176,7 +177,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
> >>          }
> >>   
> >>          i915_vma_lock(vma);
> >> -       err = i915_request_await_object(rq, vma->obj, true);
> >> +       err = move_obj_to_gpu(vma->obj, rq, true);
> >>          if (err == 0)
> >>                  err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> >>          i915_vma_unlock(vma);
> > 
> > Ah, but here it's also the wrong side of init_breadcrumb.
> 
> Why it is important to mark the object as active on the failure path? We 
> skip the payload, no?

It's important that all the async waits are before the breadcrumb. Up
until recently we would emit the semaphore after the init, and so believe
the payload was running and all waits were completed even though it was
still waiting on another request to complete. If this blt request was
subsequently relied upon to indicate the other fence completions we
would then start other requests early. [It's less important now as we
look at a flag saying that the init_breadcrumb has been emitted and
avoid adding more semaphores.]
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index f457d7130491..7d8b396e265a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -126,6 +126,17 @@  void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
 	intel_engine_pm_put(ce->engine);
 }
 
+static int
+move_obj_to_gpu(struct drm_i915_gem_object *obj,
+		struct i915_request *rq,
+		bool write)
+{
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+
+	return i915_request_await_object(rq, obj, write);
+}
+
 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value)
@@ -143,12 +154,6 @@  int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	if (unlikely(err))
 		return err;
 
-	if (obj->cache_dirty & ~obj->cache_coherent) {
-		i915_gem_object_lock(obj);
-		i915_gem_clflush_object(obj, 0);
-		i915_gem_object_unlock(obj);
-	}
-
 	batch = intel_emit_vma_fill_blt(ce, vma, value);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
@@ -165,10 +170,6 @@  int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	if (unlikely(err))
 		goto out_request;
 
-	err = i915_request_await_object(rq, obj, true);
-	if (unlikely(err))
-		goto out_request;
-
 	if (ce->engine->emit_init_breadcrumb) {
 		err = ce->engine->emit_init_breadcrumb(rq);
 		if (unlikely(err))
@@ -176,7 +177,7 @@  int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	}
 
 	i915_vma_lock(vma);
-	err = i915_request_await_object(rq, vma->obj, true);
+	err = move_obj_to_gpu(vma->obj, rq, true);
 	if (err == 0)
 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
@@ -317,16 +318,6 @@  struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	return ERR_PTR(err);
 }
 
-static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-
-	if (obj->cache_dirty & ~obj->cache_coherent)
-		i915_gem_clflush_object(obj, 0);
-
-	return i915_request_await_object(rq, obj, write);
-}
-
 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 			     struct drm_i915_gem_object *dst,
 			     struct intel_context *ce)
@@ -375,7 +366,7 @@  int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 		goto out_request;
 
 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
-		err = move_to_gpu(vma[i], rq, i);
+		err = move_obj_to_gpu(vma[i]->obj, rq, i);
 		if (unlikely(err))
 			goto out_unlock;
 	}