[6/6] drm/i915: Track the last-active inside the i915_vma

Message ID	20180629225419.5832-6-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 29 Jun 2018 23:54:19 +0100 Message-Id: <20180629225419.5832-6-chris@chris-wilson.co.uk> In-Reply-To: <20180629225419.5832-1-chris@chris-wilson.co.uk> References: <20180629225419.5832-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180629225419.5832-6-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri, 29 Jun 2018 23:54:19 +0100
Message-Id: <20180629225419.5832-6-chris@chris-wilson.co.uk>
In-Reply-To: <20180629225419.5832-1-chris@chris-wilson.co.uk>
References: <20180629225419.5832-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Track the last-active inside the
	i915_vma
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 29, 2018, 10:54 p.m. UTC

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the radixtree if
we need a second or more concurrent active trackers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 35 insertions(+), 2 deletions(-)

Comments

Tvrtko Ursulin July 3, 2018, 5:40 p.m. UTC | #1

On 29/06/2018 23:54, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the radixtree if

Is a rbtree now.

> we need a second or more concurrent active trackers.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 2faad2a1d00e..9b69d24c5cf1 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,15 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;

Put a comment for this block explaining the caching optimization.

> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;

I find "currently active fence" a bit confusing. It is just the "last 
accessed/cached fence", no?

>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +919,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +938,18 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	if (i915_gem_active_isset(&active->base)) {
> +		__list_del_entry(&active->base.link);
> +		vma->active_count--;
> +		GEM_BUG_ON(!vma->active_count);
> +	}

When does this trigger? Please put a comment for this block.

> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1029,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);

Dang.. there goes my idea to loop/iterate purely over the tree..

> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 

Okay I get the idea and it is a good optimisation for the common case.

Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 2faad2a1d00e..9b69d24c5cf1 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@  i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@  vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,15 @@  static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +919,7 @@  static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +938,18 @@  static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	if (i915_gem_active_isset(&active->base)) {
+		__list_del_entry(&active->base.link);
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1029,11 @@  int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@  struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**

[6/6] drm/i915: Track the last-active inside the i915_vma

Commit Message

Comments

Patch