diff mbox series

[07/66] drm/i915: Keep the most recently used active-fence upon discard

Message ID 20200715115147.11866-7-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand

Commit Message

Chris Wilson July 15, 2020, 11:50 a.m. UTC
Whenever an i915_active idles, we prune its tree of old fence slots to
prevent a gradual leak should it be used to track many, many timelines.
The downside is that we then have to frequently reallocate the rbtree.
A compromise is that we keep the most recently used fence slot, and
reuse that for the next active reference as that is the most likely
timeline to be reused.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_active.h |  4 ----
 2 files changed, 20 insertions(+), 11 deletions(-)

Comments

Tvrtko Ursulin July 17, 2020, 12:38 p.m. UTC | #1
On 15/07/2020 12:50, Chris Wilson wrote:
> Whenever an i915_active idles, we prune its tree of old fence slots to
> prevent a gradual leak should it be used to track many, many timelines.
> The downside is that we then have to frequently reallocate the rbtree.
> A compromise is that we keep the most recently used fence slot, and
> reuse that for the next active reference as that is the most likely
> timeline to be reused.
 >
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_active.h |  4 ----
>   2 files changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 799282fb1bb9..0854b1552bc1 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -130,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { }
>   static void
>   __active_retire(struct i915_active *ref)
>   {
> +	struct rb_root root = RB_ROOT;
>   	struct active_node *it, *n;
> -	struct rb_root root;
>   	unsigned long flags;
>   
>   	GEM_BUG_ON(i915_active_is_idle(ref));
> @@ -143,9 +143,21 @@ __active_retire(struct i915_active *ref)
>   	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
>   	debug_active_deactivate(ref);
>   
> -	root = ref->tree;
> -	ref->tree = RB_ROOT;
> -	ref->cache = NULL;
> +	/* Even if we have not used the cache, we may still have a barrier */
> +	if (!ref->cache)
> +		ref->cache = fetch_node(ref->tree.rb_node);
> +
> +	/* Keep the MRU cached node for reuse */
> +	if (ref->cache) {
> +		/* Discard all other nodes in the tree */
> +		rb_erase(&ref->cache->node, &ref->tree);
> +		root = ref->tree;
> +
> +		/* Rebuild the tree with only the cached node */
> +		rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
> +		rb_insert_color(&ref->cache->node, &ref->tree);
> +		GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
> +	}
>   
>   	spin_unlock_irqrestore(&ref->tree_lock, flags);
>   
> @@ -156,6 +168,7 @@ __active_retire(struct i915_active *ref)
>   	/* ... except if you wait on it, you must manage your own references! */
>   	wake_up_var(ref);
>   
> +	/* Finally free the discarded timeline tree  */
>   	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
>   		GEM_BUG_ON(i915_active_fence_isset(&it->base));
>   		kmem_cache_free(global.slab_cache, it);

Here it frees everything.. so how does ref->cache, being in the tree, 
survives?

> @@ -750,16 +763,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
>   	return await_active(ref, flags, sw_await_fence, fence, fence);
>   }
>   
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   void i915_active_fini(struct i915_active *ref)
>   {
>   	debug_active_fini(ref);
>   	GEM_BUG_ON(atomic_read(&ref->count));
>   	GEM_BUG_ON(work_pending(&ref->work));
> -	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
>   	mutex_destroy(&ref->mutex);
> +
> +	if (ref->cache)
> +		kmem_cache_free(global.slab_cache, ref->cache);
>   }
> -#endif
>   
>   static inline bool is_idle_barrier(struct active_node *node, u64 idx)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 73ded3c52a04..b9e0394e2975 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -217,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref)
>   	return !atomic_read(&ref->count);
>   }
>   
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   void i915_active_fini(struct i915_active *ref);
> -#else
> -static inline void i915_active_fini(struct i915_active *ref) { }
> -#endif
>   
>   int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   					    struct intel_engine_cs *engine);
>
Thomas Hellström (Intel) July 22, 2020, 9:46 a.m. UTC | #2
On 2020-07-15 13:50, Chris Wilson wrote:
> Whenever an i915_active idles, we prune its tree of old fence slots to
> prevent a gradual leak should it be used to track many, many timelines.
> The downside is that we then have to frequently reallocate the rbtree.
> A compromise is that we keep the most recently used fence slot, and
> reuse that for the next active reference as that is the most likely
> timeline to be reused.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_active.h |  4 ----
>   2 files changed, 20 insertions(+), 11 deletions(-)

Lgtm. Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
Chris Wilson July 28, 2020, 2:22 p.m. UTC | #3
Quoting Tvrtko Ursulin (2020-07-17 13:38:01)
> 
> On 15/07/2020 12:50, Chris Wilson wrote:
> > +     /* Even if we have not used the cache, we may still have a barrier */
> > +     if (!ref->cache)
> > +             ref->cache = fetch_node(ref->tree.rb_node);
> > +
> > +     /* Keep the MRU cached node for reuse */
> > +     if (ref->cache) {
> > +             /* Discard all other nodes in the tree */
> > +             rb_erase(&ref->cache->node, &ref->tree);
> > +             root = ref->tree;
> > +
> > +             /* Rebuild the tree with only the cached node */
> > +             rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
> > +             rb_insert_color(&ref->cache->node, &ref->tree);
> > +             GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
> > +     }
> >   
> >       spin_unlock_irqrestore(&ref->tree_lock, flags);
> >   
> > @@ -156,6 +168,7 @@ __active_retire(struct i915_active *ref)
> >       /* ... except if you wait on it, you must manage your own references! */
> >       wake_up_var(ref);
> >   
> > +     /* Finally free the discarded timeline tree  */
> >       rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
> >               GEM_BUG_ON(i915_active_fence_isset(&it->base));
> >               kmem_cache_free(global.slab_cache, it);
> 
> Here it frees everything.. so how does ref->cache, being in the tree, 
> survives?

This is the old root which does not contain ref->cache, as we moved that
to become the new tree.

             /* Discard all other nodes in the tree */
             rb_erase(&ref->cache->node, &ref->tree);
             root = ref->tree;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 799282fb1bb9..0854b1552bc1 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -130,8 +130,8 @@  static inline void debug_active_assert(struct i915_active *ref) { }
 static void
 __active_retire(struct i915_active *ref)
 {
+	struct rb_root root = RB_ROOT;
 	struct active_node *it, *n;
-	struct rb_root root;
 	unsigned long flags;
 
 	GEM_BUG_ON(i915_active_is_idle(ref));
@@ -143,9 +143,21 @@  __active_retire(struct i915_active *ref)
 	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	debug_active_deactivate(ref);
 
-	root = ref->tree;
-	ref->tree = RB_ROOT;
-	ref->cache = NULL;
+	/* Even if we have not used the cache, we may still have a barrier */
+	if (!ref->cache)
+		ref->cache = fetch_node(ref->tree.rb_node);
+
+	/* Keep the MRU cached node for reuse */
+	if (ref->cache) {
+		/* Discard all other nodes in the tree */
+		rb_erase(&ref->cache->node, &ref->tree);
+		root = ref->tree;
+
+		/* Rebuild the tree with only the cached node */
+		rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
+		rb_insert_color(&ref->cache->node, &ref->tree);
+		GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
+	}
 
 	spin_unlock_irqrestore(&ref->tree_lock, flags);
 
@@ -156,6 +168,7 @@  __active_retire(struct i915_active *ref)
 	/* ... except if you wait on it, you must manage your own references! */
 	wake_up_var(ref);
 
+	/* Finally free the discarded timeline tree  */
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
 		GEM_BUG_ON(i915_active_fence_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
@@ -750,16 +763,16 @@  int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 	return await_active(ref, flags, sw_await_fence, fence, fence);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
 	debug_active_fini(ref);
 	GEM_BUG_ON(atomic_read(&ref->count));
 	GEM_BUG_ON(work_pending(&ref->work));
-	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
+
+	if (ref->cache)
+		kmem_cache_free(global.slab_cache, ref->cache);
 }
-#endif
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 73ded3c52a04..b9e0394e2975 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -217,11 +217,7 @@  i915_active_is_idle(const struct i915_active *ref)
 	return !atomic_read(&ref->count);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref);
-#else
-static inline void i915_active_fini(struct i915_active *ref) { }
-#endif
 
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine);