Message ID | 20200715115147.11866-7-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/66] drm/i915: Reduce i915_request.lock contention for i915_request_wait | expand |
On 15/07/2020 12:50, Chris Wilson wrote: > Whenever an i915_active idles, we prune its tree of old fence slots to > prevent a gradual leak should it be used to track many, many timelines. > The downside is that we then have to frequently reallocate the rbtree. > A compromise is that we keep the most recently used fence slot, and > reuse that for the next active reference as that is the most likely > timeline to be reused. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++------- > drivers/gpu/drm/i915/i915_active.h | 4 ---- > 2 files changed, 20 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c > index 799282fb1bb9..0854b1552bc1 100644 > --- a/drivers/gpu/drm/i915/i915_active.c > +++ b/drivers/gpu/drm/i915/i915_active.c > @@ -130,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { } > static void > __active_retire(struct i915_active *ref) > { > + struct rb_root root = RB_ROOT; > struct active_node *it, *n; > - struct rb_root root; > unsigned long flags; > > GEM_BUG_ON(i915_active_is_idle(ref)); > @@ -143,9 +143,21 @@ __active_retire(struct i915_active *ref) > GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); > debug_active_deactivate(ref); > > - root = ref->tree; > - ref->tree = RB_ROOT; > - ref->cache = NULL; > + /* Even if we have not used the cache, we may still have a barrier */ > + if (!ref->cache) > + ref->cache = fetch_node(ref->tree.rb_node); > + > + /* Keep the MRU cached node for reuse */ > + if (ref->cache) { > + /* Discard all other nodes in the tree */ > + rb_erase(&ref->cache->node, &ref->tree); > + root = ref->tree; > + > + /* Rebuild the tree with only the cached node */ > + rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); > + rb_insert_color(&ref->cache->node, &ref->tree); > + GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); > + } > > spin_unlock_irqrestore(&ref->tree_lock, flags); > > @@ -156,6 +168,7 @@ __active_retire(struct i915_active *ref) > /* ... except if you wait on it, you must manage your own references! */ > wake_up_var(ref); > > + /* Finally free the discarded timeline tree */ > rbtree_postorder_for_each_entry_safe(it, n, &root, node) { > GEM_BUG_ON(i915_active_fence_isset(&it->base)); > kmem_cache_free(global.slab_cache, it); Here it frees everything.. so how does ref->cache, being in the tree, survives? > @@ -750,16 +763,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence, > return await_active(ref, flags, sw_await_fence, fence, fence); > } > > -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) > void i915_active_fini(struct i915_active *ref) > { > debug_active_fini(ref); > GEM_BUG_ON(atomic_read(&ref->count)); > GEM_BUG_ON(work_pending(&ref->work)); > - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); > mutex_destroy(&ref->mutex); > + > + if (ref->cache) > + kmem_cache_free(global.slab_cache, ref->cache); > } > -#endif > > static inline bool is_idle_barrier(struct active_node *node, u64 idx) > { > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index 73ded3c52a04..b9e0394e2975 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -217,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref) > return !atomic_read(&ref->count); > } > > -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) > void i915_active_fini(struct i915_active *ref); > -#else > -static inline void i915_active_fini(struct i915_active *ref) { } > -#endif > > int i915_active_acquire_preallocate_barrier(struct i915_active *ref, > struct intel_engine_cs *engine); >
On 2020-07-15 13:50, Chris Wilson wrote: > Whenever an i915_active idles, we prune its tree of old fence slots to > prevent a gradual leak should it be used to track many, many timelines. > The downside is that we then have to frequently reallocate the rbtree. > A compromise is that we keep the most recently used fence slot, and > reuse that for the next active reference as that is the most likely > timeline to be reused. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++------- > drivers/gpu/drm/i915/i915_active.h | 4 ---- > 2 files changed, 20 insertions(+), 11 deletions(-) Lgtm. Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com>
Quoting Tvrtko Ursulin (2020-07-17 13:38:01) > > On 15/07/2020 12:50, Chris Wilson wrote: > > + /* Even if we have not used the cache, we may still have a barrier */ > > + if (!ref->cache) > > + ref->cache = fetch_node(ref->tree.rb_node); > > + > > + /* Keep the MRU cached node for reuse */ > > + if (ref->cache) { > > + /* Discard all other nodes in the tree */ > > + rb_erase(&ref->cache->node, &ref->tree); > > + root = ref->tree; > > + > > + /* Rebuild the tree with only the cached node */ > > + rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); > > + rb_insert_color(&ref->cache->node, &ref->tree); > > + GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); > > + } > > > > spin_unlock_irqrestore(&ref->tree_lock, flags); > > > > @@ -156,6 +168,7 @@ __active_retire(struct i915_active *ref) > > /* ... except if you wait on it, you must manage your own references! */ > > wake_up_var(ref); > > > > + /* Finally free the discarded timeline tree */ > > rbtree_postorder_for_each_entry_safe(it, n, &root, node) { > > GEM_BUG_ON(i915_active_fence_isset(&it->base)); > > kmem_cache_free(global.slab_cache, it); > > Here it frees everything.. so how does ref->cache, being in the tree, > survives? This is the old root which does not contain ref->cache, as we moved that to become the new tree. /* Discard all other nodes in the tree */ rb_erase(&ref->cache->node, &ref->tree); root = ref->tree;
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 799282fb1bb9..0854b1552bc1 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -130,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { } static void __active_retire(struct i915_active *ref) { + struct rb_root root = RB_ROOT; struct active_node *it, *n; - struct rb_root root; unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -143,9 +143,21 @@ __active_retire(struct i915_active *ref) GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; + /* Even if we have not used the cache, we may still have a barrier */ + if (!ref->cache) + ref->cache = fetch_node(ref->tree.rb_node); + + /* Keep the MRU cached node for reuse */ + if (ref->cache) { + /* Discard all other nodes in the tree */ + rb_erase(&ref->cache->node, &ref->tree); + root = ref->tree; + + /* Rebuild the tree with only the cached node */ + rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); + rb_insert_color(&ref->cache->node, &ref->tree); + GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); + } spin_unlock_irqrestore(&ref->tree_lock, flags); @@ -156,6 +168,7 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); @@ -750,16 +763,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence, return await_active(ref, flags, sw_await_fence, fence, fence); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); GEM_BUG_ON(atomic_read(&ref->count)); GEM_BUG_ON(work_pending(&ref->work)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); + + if (ref->cache) + kmem_cache_free(global.slab_cache, ref->cache); } -#endif static inline bool is_idle_barrier(struct active_node *node, u64 idx) { diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 73ded3c52a04..b9e0394e2975 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -217,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref) return !atomic_read(&ref->count); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void i915_active_fini(struct i915_active *ref); -#else -static inline void i915_active_fini(struct i915_active *ref) { } -#endif int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine);
Whenever an i915_active idles, we prune its tree of old fence slots to prevent a gradual leak should it be used to track many, many timelines. The downside is that we then have to frequently reallocate the rbtree. A compromise is that we keep the most recently used fence slot, and reuse that for the next active reference as that is the most likely timeline to be reused. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_active.c | 27 ++++++++++++++++++++------- drivers/gpu/drm/i915/i915_active.h | 4 ---- 2 files changed, 20 insertions(+), 11 deletions(-)