diff mbox series

[2/3] drm/i915/gem: Use a single chained reloc batches for a single execbuf

Message ID 20200501130217.5708-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915/gem: Use chained reloc batches | expand

Commit Message

Chris Wilson May 1, 2020, 1:02 p.m. UTC
As we can now keep chaining together a relocation batch to process any
number of relocations, we can keep building that relocation batch for
all of the target vma. This avoiding emitting a new request into the
ring for each target, consuming precious ring space and a potential
stall.

v2: Propagate the failure from submitting the relocation batch.

Testcase: igt/gem_exec_reloc/basic-wide-active
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

Comments

Tvrtko Ursulin May 1, 2020, 1:29 p.m. UTC | #1
On 01/05/2020 14:02, Chris Wilson wrote:
> As we can now keep chaining together a relocation batch to process any
> number of relocations, we can keep building that relocation batch for
> all of the target vma. This avoiding emitting a new request into the
> ring for each target, consuming precious ring space and a potential
> stall.
> 
> v2: Propagate the failure from submitting the relocation batch.
> 
> Testcase: igt/gem_exec_reloc/basic-wide-active
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> #v1
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 31 ++++++++++++-------
>   1 file changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 0874976b1cf7..4c4b9e0e75bc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -268,6 +268,7 @@ struct i915_execbuffer {
>   		bool has_fence : 1;
>   		bool needs_unfenced : 1;
>   
> +		struct i915_vma *target;
>   		struct i915_request *rq;
>   		u32 *rq_cmd;
>   		unsigned int rq_size;
> @@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
>   	return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
>   }
>   
> -static void reloc_gpu_flush(struct reloc_cache *cache)
> +static int reloc_gpu_flush(struct reloc_cache *cache)
>   {
>   	struct i915_request *rq;
>   	int err;
>   
>   	rq = fetch_and_zero(&cache->rq);
>   	if (!rq)
> -		return;
> +		return 0;
>   
>   	if (cache->rq_vma) {
>   		struct drm_i915_gem_object *obj = cache->rq_vma->obj;
> @@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
>   
>   	intel_gt_chipset_flush(rq->engine->gt);
>   	i915_request_add(rq);
> +
> +	return err;
>   }
>   
>   static void reloc_cache_reset(struct reloc_cache *cache)
>   {
>   	void *vaddr;
>   
> -	if (cache->rq)
> -		reloc_gpu_flush(cache);
> -
>   	if (!cache->vaddr)
>   		return;
>   
> @@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
>   }
>   
>   static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> -			     struct i915_vma *vma,
>   			     unsigned int len)
>   {
>   	struct reloc_cache *cache = &eb->reloc_cache;
> @@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   		goto out_pool;
>   	}
>   
> -	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
> +	batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
>   	if (IS_ERR(batch)) {
>   		err = PTR_ERR(batch);
>   		goto err_unmap;
> @@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   	if (err)
>   		goto err_request;
>   
> -	err = reloc_move_to_gpu(rq, vma);
> -	if (err)
> -		goto err_request;
> -
>   	i915_vma_lock(batch);
>   	err = i915_request_await_object(rq, batch->obj, false);
>   	if (err == 0)
> @@ -1376,11 +1371,19 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
>   		if (!intel_engine_can_store_dword(eb->engine))
>   			return ERR_PTR(-ENODEV);
>   
> -		err = __reloc_gpu_alloc(eb, vma, len);
> +		err = __reloc_gpu_alloc(eb, len);
>   		if (unlikely(err))
>   			return ERR_PTR(err);
>   	}
>   
> +	if (vma != cache->target) {
> +		err = reloc_move_to_gpu(cache->rq, vma);
> +		if (unlikely(err))
> +			return ERR_PTR(err);
> +
> +		cache->target = vma;
> +	}
> +
>   	if (unlikely(cache->rq_size + len >
>   		     PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
>   		err = reloc_gpu_chain(cache);
> @@ -1698,6 +1701,10 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   			if (err)
>   				return err;
>   		}
> +
> +		err = reloc_gpu_flush(&eb->reloc_cache);
> +		if (err)
> +			return err;
>   	}
>   
>   	return 0;
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 0874976b1cf7..4c4b9e0e75bc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -268,6 +268,7 @@  struct i915_execbuffer {
 		bool has_fence : 1;
 		bool needs_unfenced : 1;
 
+		struct i915_vma *target;
 		struct i915_request *rq;
 		u32 *rq_cmd;
 		unsigned int rq_size;
@@ -1051,14 +1052,14 @@  static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
 	return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
 }
 
-static void reloc_gpu_flush(struct reloc_cache *cache)
+static int reloc_gpu_flush(struct reloc_cache *cache)
 {
 	struct i915_request *rq;
 	int err;
 
 	rq = fetch_and_zero(&cache->rq);
 	if (!rq)
-		return;
+		return 0;
 
 	if (cache->rq_vma) {
 		struct drm_i915_gem_object *obj = cache->rq_vma->obj;
@@ -1084,15 +1085,14 @@  static void reloc_gpu_flush(struct reloc_cache *cache)
 
 	intel_gt_chipset_flush(rq->engine->gt);
 	i915_request_add(rq);
+
+	return err;
 }
 
 static void reloc_cache_reset(struct reloc_cache *cache)
 {
 	void *vaddr;
 
-	if (cache->rq)
-		reloc_gpu_flush(cache);
-
 	if (!cache->vaddr)
 		return;
 
@@ -1285,7 +1285,6 @@  static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 }
 
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
-			     struct i915_vma *vma,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
@@ -1308,7 +1307,7 @@  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto out_pool;
 	}
 
-	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
+	batch = i915_vma_instance(pool->obj, eb->context->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto err_unmap;
@@ -1328,10 +1327,6 @@  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
-	err = reloc_move_to_gpu(rq, vma);
-	if (err)
-		goto err_request;
-
 	i915_vma_lock(batch);
 	err = i915_request_await_object(rq, batch->obj, false);
 	if (err == 0)
@@ -1376,11 +1371,19 @@  static u32 *reloc_gpu(struct i915_execbuffer *eb,
 		if (!intel_engine_can_store_dword(eb->engine))
 			return ERR_PTR(-ENODEV);
 
-		err = __reloc_gpu_alloc(eb, vma, len);
+		err = __reloc_gpu_alloc(eb, len);
 		if (unlikely(err))
 			return ERR_PTR(err);
 	}
 
+	if (vma != cache->target) {
+		err = reloc_move_to_gpu(cache->rq, vma);
+		if (unlikely(err))
+			return ERR_PTR(err);
+
+		cache->target = vma;
+	}
+
 	if (unlikely(cache->rq_size + len >
 		     PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
 		err = reloc_gpu_chain(cache);
@@ -1698,6 +1701,10 @@  static int eb_relocate(struct i915_execbuffer *eb)
 			if (err)
 				return err;
 		}
+
+		err = reloc_gpu_flush(&eb->reloc_cache);
+		if (err)
+			return err;
 	}
 
 	return 0;