[6/6] drm/i915: Track the last-active inside the i915_vma

Message ID	20180706103947.15919-6-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 6 Jul 2018 11:39:47 +0100 Message-Id: <20180706103947.15919-6-chris@chris-wilson.co.uk> In-Reply-To: <20180706103947.15919-1-chris@chris-wilson.co.uk> References: <20180706103947.15919-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180706103947.15919-6-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri,  6 Jul 2018 11:39:47 +0100
Message-Id: <20180706103947.15919-6-chris@chris-wilson.co.uk>
In-Reply-To: <20180706103947.15919-1-chris@chris-wilson.co.uk>
References: <20180706103947.15919-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 6/6] drm/i915: Track the last-active inside the
	i915_vma
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson July 6, 2018, 10:39 a.m. UTC

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the rbtree if
we need a second or more concurrent active trackers.

v2: Comments on how we overwrite any existing last_active cache.
v3: __list_del_entry() before list_replace_init() is confusing and, much
more important, entirely redundant.
v4: Note that both last_active and the rbtree may be simultaneously
tracking this timeline, albeit with different requests, and so the vma
may be retired twice for the same timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 56 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 55 insertions(+), 2 deletions(-)

Comments

Tvrtko Ursulin July 6, 2018, 11:07 a.m. UTC | #1

On 06/07/2018 11:39, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the rbtree if
> we need a second or more concurrent active trackers.
> 
> v2: Comments on how we overwrite any existing last_active cache.
> v3: __list_del_entry() before list_replace_init() is confusing and, much
> more important, entirely redundant.
> v4: Note that both last_active and the rbtree may be simultaneously
> tracking this timeline, albeit with different requests, and so the vma
> may be retired twice for the same timeline.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 56 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index b4cc98330225..6fbd09d6af28 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,29 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last_active slot if it is empty, that is
> +	 * after the previous activity has been retired, or if the active
> +	 * matches the current timeline.
> +	 *
> +	 * Note that we allow the timeline to be active simultaneously in
> +	 * the rbtree and the last_active cache. We do this to avoid having
> +	 * to search and replace the rbtree element for a new timeline, with
> +	 * the cost being that we must be aware that the vma may be retired
> +	 * twice for the same timeline (as the older rbtree element will be
> +	 * retired before the new request added to last_active).
> +	 */
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;
> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +933,7 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +952,24 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last_active,
> +	 * leaving last_active zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to recieve one retire
> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&active->base)) {
> +		vma->active_count--;
> +		GEM_BUG_ON(!vma->active_count);
> +	}
> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1049,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);
> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b4cc98330225..6fbd09d6af28 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@  i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@  vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,29 @@  static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last_active slot if it is empty, that is
+	 * after the previous activity has been retired, or if the active
+	 * matches the current timeline.
+	 *
+	 * Note that we allow the timeline to be active simultaneously in
+	 * the rbtree and the last_active cache. We do this to avoid having
+	 * to search and replace the rbtree element for a new timeline, with
+	 * the cost being that we must be aware that the vma may be retired
+	 * twice for the same timeline (as the older rbtree element will be
+	 * retired before the new request added to last_active).
+	 */
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +933,7 @@  static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +952,24 @@  static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last_active,
+	 * leaving last_active zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to recieve one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_gem_active_isset(&active->base)) {
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1049,11 @@  int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@  struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**

[6/6] drm/i915: Track the last-active inside the i915_vma

Commit Message

Comments

Patch