Message ID | 20190130021906.17933-6-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/11] drm/i915: Revoke mmaps and prevent access to fence registers across reset | expand |
On 30/01/2019 02:19, Chris Wilson wrote: > Wrap the active tracking for a GPU references in a slabcache for faster > allocations, and keep track of inflight nodes so we can reap the > stale entries upon parking (thereby trimming our memory usage). I suggest a two staged approach. First patch add a slab cache (you can also add kmem_cache_shrink on park as we do for other caches), then add the parking/reaping bit. Under what scenarios we end up not freeing active nodes sufficiently? It would have to be some user which keeps many contexts around, having only used them once? > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_active.c | 55 ++++++++++++++++--- > drivers/gpu/drm/i915/i915_active.h | 21 +++++-- > drivers/gpu/drm/i915/i915_active_types.h | 12 +++- > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_gem.c | 16 +++++- > drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- > drivers/gpu/drm/i915/i915_vma.c | 3 +- > drivers/gpu/drm/i915/selftests/i915_active.c | 3 +- > .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ++ > 9 files changed, 100 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c > index e0182e19cb8b..3c7abbde42ac 100644 > --- a/drivers/gpu/drm/i915/i915_active.c > +++ b/drivers/gpu/drm/i915/i915_active.c > @@ -7,7 +7,9 @@ > #include "i915_drv.h" > #include "i915_active.h" > > -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) > +#define i915_from_gt(x) \ > + container_of(x, struct drm_i915_private, gt.active_refs) > +#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex) > > struct active_node { > struct i915_gem_active base; > @@ -79,11 +81,11 @@ active_instance(struct i915_active *ref, u64 idx) > p = &parent->rb_left; > } > > - node = kmalloc(sizeof(*node), GFP_KERNEL); > + node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL); > > /* kmalloc may retire the ref->last (thanks shrinker)! */ > if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { > - kfree(node); > + kmem_cache_free(ref->gt->slab_cache, node); > goto out; > } > > @@ -94,6 +96,9 @@ active_instance(struct i915_active *ref, u64 idx) > node->ref = ref; > node->timeline = idx; > > + if (RB_EMPTY_ROOT(&ref->tree)) > + list_add(&ref->active_link, &ref->gt->active_refs); > + > rb_link_node(&node->node, parent, p); > rb_insert_color(&node->node, &ref->tree); > > @@ -119,11 +124,11 @@ active_instance(struct i915_active *ref, u64 idx) > return &ref->last; > } > > -void i915_active_init(struct drm_i915_private *i915, > +void i915_active_init(struct i915_gt_active *gt, > struct i915_active *ref, > void (*retire)(struct i915_active *ref)) > { > - ref->i915 = i915; > + ref->gt = gt; > ref->retire = retire; > ref->tree = RB_ROOT; > init_request_active(&ref->last, last_retire); > @@ -161,6 +166,7 @@ void i915_active_release(struct i915_active *ref) > > int i915_active_wait(struct i915_active *ref) > { > + struct kmem_cache *slab = ref->gt->slab_cache; > struct active_node *it, *n; > int ret; > > @@ -168,15 +174,19 @@ int i915_active_wait(struct i915_active *ref) > if (ret) > return ret; > > + if (RB_EMPTY_ROOT(&ref->tree)) > + return 0; > + > rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { > ret = i915_gem_active_retire(&it->base, BKL(ref)); > if (ret) > return ret; > > GEM_BUG_ON(i915_gem_active_isset(&it->base)); > - kfree(it); > + kmem_cache_free(slab, it); > } > ref->tree = RB_ROOT; > + list_del(&ref->active_link); > > return 0; > } > @@ -210,15 +220,46 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) > > void i915_active_fini(struct i915_active *ref) > { > + struct kmem_cache *slab = ref->gt->slab_cache; > struct active_node *it, *n; > > + lockdep_assert_held(BKL(ref)); > GEM_BUG_ON(i915_gem_active_isset(&ref->last)); > > + if (RB_EMPTY_ROOT(&ref->tree)) > + return; > + > rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { > GEM_BUG_ON(i915_gem_active_isset(&it->base)); > - kfree(it); > + kmem_cache_free(slab, it); > } > ref->tree = RB_ROOT; > + list_del(&ref->active_link); > +} > + > +int i915_gt_active_init(struct i915_gt_active *gt) > +{ > + gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); > + if (!gt->slab_cache) > + return -ENOMEM; > + > + INIT_LIST_HEAD(>->active_refs); > + > + return 0; > +} > + > +void i915_gt_active_park(struct i915_gt_active *gt) > +{ > + struct i915_active *it, *n; > + > + list_for_each_entry_safe(it, n, >->active_refs, active_link) > + i915_active_fini(it); > +} > + > +void i915_gt_active_fini(struct i915_gt_active *gt) > +{ > + GEM_BUG_ON(!list_empty(>->active_refs)); > + kmem_cache_destroy(gt->slab_cache); > } > > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h > index c0729a046f98..41c4a5da84c8 100644 > --- a/drivers/gpu/drm/i915/i915_active.h > +++ b/drivers/gpu/drm/i915/i915_active.h > @@ -9,10 +9,6 @@ > > #include "i915_active_types.h" > > -#include <linux/rbtree.h> > - > -#include "i915_request.h" > - > /* > * GPU activity tracking > * > @@ -39,7 +35,7 @@ > * synchronisation. > */ > > -void i915_active_init(struct drm_i915_private *i915, > +void i915_active_init(struct i915_gt_active *gt, > struct i915_active *ref, > void (*retire)(struct i915_active *ref)); > > @@ -63,4 +59,19 @@ i915_active_is_idle(const struct i915_active *ref) > > void i915_active_fini(struct i915_active *ref); > > +/* > + * Active refs memory management > + * > + * To be more economical with memory, we reap all the i915_active trees on > + * parking the GPU (when we know the GPU is inactive) and allocate the nodes > + * from a local slab cache to hopefully reduce the fragmentation as we will > + * then be able to free all pages en masse upon idling. > + */ > + > +int i915_gt_active_init(struct i915_gt_active *gt); > +void i915_gt_active_park(struct i915_gt_active *gt); > +void i915_gt_active_fini(struct i915_gt_active *gt); > + > +#define i915_gt_active(i915) (&(i915)->gt.active_refs) > + > #endif /* _I915_ACTIVE_H_ */ > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h > index 411e502ed8dd..3d41c33ca78c 100644 > --- a/drivers/gpu/drm/i915/i915_active_types.h > +++ b/drivers/gpu/drm/i915/i915_active_types.h > @@ -7,14 +7,17 @@ > #ifndef _I915_ACTIVE_TYPES_H_ > #define _I915_ACTIVE_TYPES_H_ > > +#include <linux/list.h> > #include <linux/rbtree.h> > > #include "i915_request.h" > > -struct drm_i915_private; > +struct i915_gt_active; > +struct kmem_cache; > > struct i915_active { > - struct drm_i915_private *i915; > + struct i915_gt_active *gt; gt_active would be better - gt is to vague. > + struct list_head active_link; > > struct rb_root tree; > struct i915_gem_active last; > @@ -23,4 +26,9 @@ struct i915_active { > void (*retire)(struct i915_active *ref); > }; > > +struct i915_gt_active { > + struct list_head active_refs; > + struct kmem_cache *slab_cache; > +}; > + > #endif /* _I915_ACTIVE_TYPES_H_ */ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 8ec28a7f5452..480ab3e00ba8 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1984,6 +1984,8 @@ struct drm_i915_private { > struct list_head hwsp_free_list; > } timelines; > > + struct i915_gt_active active_refs; > + > struct list_head active_rings; > struct list_head closed_vma; > u32 active_requests; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index caccff87a2a1..2bc735df408b 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -130,6 +130,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) > > intel_engines_park(i915); > i915_timelines_park(i915); > + i915_gt_active_park(i915_gt_active(i915)); The i915_gt_active macro is just too horrible IMHO. Why? :) Make i915_gt_active_park take i915, or i915->gt_active. > > i915_pmu_gt_parked(i915); > i915_vma_parked(i915); > @@ -4998,15 +4999,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > dev_priv->gt.cleanup_engine = intel_engine_cleanup; > } > > + ret = i915_gt_active_init(i915_gt_active(dev_priv)); > + if (ret) > + return ret; > + > i915_timelines_init(dev_priv); > > ret = i915_gem_init_userptr(dev_priv); > if (ret) > - return ret; > + goto err_timelines; > > ret = intel_uc_init_misc(dev_priv); > if (ret) > - return ret; > + goto err_userptr; > > ret = intel_wopcm_init(&dev_priv->wopcm); > if (ret) > @@ -5122,9 +5127,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > err_uc_misc: > intel_uc_fini_misc(dev_priv); > > - if (ret != -EIO) { > +err_userptr: > + if (ret != -EIO) > i915_gem_cleanup_userptr(dev_priv); > +err_timelines: > + if (ret != -EIO) { > i915_timelines_fini(dev_priv); > + i915_gt_active_fini(i915_gt_active(dev_priv)); > } > > if (ret == -EIO) { > @@ -5177,6 +5186,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) > intel_uc_fini_misc(dev_priv); > i915_gem_cleanup_userptr(dev_priv); > i915_timelines_fini(dev_priv); > + i915_gt_active_fini(i915_gt_active(dev_priv)); > > i915_gem_drain_freed_objects(dev_priv); > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index e625659c03a2..d8819de0d6ee 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) > if (!vma) > return ERR_PTR(-ENOMEM); > > - i915_active_init(i915, &vma->active, NULL); > + i915_active_init(i915_gt_active(i915), &vma->active, NULL); > init_request_active(&vma->last_fence, NULL); > > vma->vm = &ggtt->vm; > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index d4772061e642..2456bfb4877b 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj, > if (vma == NULL) > return ERR_PTR(-ENOMEM); > > - i915_active_init(vm->i915, &vma->active, __i915_vma_retire); > + i915_active_init(i915_gt_active(vm->i915), > + &vma->active, __i915_vma_retire); > init_request_active(&vma->last_fence, NULL); > > vma->vm = vm; > diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c > index 7c5c3068565b..0e923476920e 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_active.c > +++ b/drivers/gpu/drm/i915/selftests/i915_active.c > @@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915, > unsigned int count = 0; > int err = 0; > > - i915_active_init(i915, &active->base, __live_active_retire); > + i915_active_init(i915_gt_active(i915), > + &active->base, __live_active_retire); > active->retired = false; > > if (!i915_active_acquire(&active->base)) { > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index 074a0d9cbf26..5b88f74c1677 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -69,6 +69,7 @@ static void mock_device_release(struct drm_device *dev) > mutex_unlock(&i915->drm.struct_mutex); > > i915_timelines_fini(i915); > + i915_gt_active_fini(i915_gt_active(i915)); > > drain_workqueue(i915->wq); > i915_gem_drain_freed_objects(i915); > @@ -228,6 +229,9 @@ struct drm_i915_private *mock_gem_device(void) > if (!i915->priorities) > goto err_dependencies; > > + if (i915_gt_active_init(i915_gt_active(i915))) > + goto err_priorities; > + > i915_timelines_init(i915); > > INIT_LIST_HEAD(&i915->gt.active_rings); > @@ -257,6 +261,8 @@ struct drm_i915_private *mock_gem_device(void) > err_unlock: > mutex_unlock(&i915->drm.struct_mutex); > i915_timelines_fini(i915); > + i915_gt_active_fini(i915_gt_active(i915)); > +err_priorities: > kmem_cache_destroy(i915->priorities); > err_dependencies: > kmem_cache_destroy(i915->dependencies); > Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-01-30 15:57:06) > > On 30/01/2019 02:19, Chris Wilson wrote: > > Wrap the active tracking for a GPU references in a slabcache for faster > > allocations, and keep track of inflight nodes so we can reap the > > stale entries upon parking (thereby trimming our memory usage). > > I suggest a two staged approach. First patch add a slab cache (you can > also add kmem_cache_shrink on park as we do for other caches), then add > the parking/reaping bit. Not really seeing the point, the lesson learnt is that we should be tidying up on parking and that's why having a slab makes sense in the first place. I can see one argument to not do the idle reaping immediately and that's if we apply the reaping on vma idle instead. Then parking is redundant. > Under what scenarios we end up not freeing active nodes sufficiently? It > would have to be some user which keeps many contexts around, having only > used them once? Never in a typical scenario :) Almost every allocation is served from the last_request slot, you need to have a pair of concurrent references to a vma for it to activate, and that takes multiple engines reading the same object within a context/ppgtt. > > struct i915_active { > > - struct drm_i915_private *i915; > > + struct i915_gt_active *gt; > > gt_active would be better - gt is to vague. As a backpointer though it's defined by its owner. Give me a better name for everything. > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index caccff87a2a1..2bc735df408b 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -130,6 +130,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) > > > > intel_engines_park(i915); > > i915_timelines_park(i915); > > + i915_gt_active_park(i915_gt_active(i915)); > > The i915_gt_active macro is just too horrible IMHO. Why? :) Because it was called gt.active_refs and that was irking me. I really haven't settled on the name. > Make i915_gt_active_park take i915, or i915->gt_active. The pretense at encapsulation was nice, and I'd like to push harder on that front. -Chris
Quoting Chris Wilson (2019-01-30 16:08:59) > Quoting Tvrtko Ursulin (2019-01-30 15:57:06) > > Under what scenarios we end up not freeing active nodes sufficiently? It > > would have to be some user which keeps many contexts around, having only > > used them once? > > Never in a typical scenario :) Almost every allocation is served from > the last_request slot, you need to have a pair of concurrent references > to a vma for it to activate, and that takes multiple engines reading the > same object within a context/ppgtt. That being said, that applies to vma. HW semaphores is an example where the cacheline could be shared far and wide with uncertain lifetimes. Context barriers in contrast just have a few different timelines and idle almost immediately. -Chris
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index e0182e19cb8b..3c7abbde42ac 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -7,7 +7,9 @@ #include "i915_drv.h" #include "i915_active.h" -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) +#define i915_from_gt(x) \ + container_of(x, struct drm_i915_private, gt.active_refs) +#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex) struct active_node { struct i915_gem_active base; @@ -79,11 +81,11 @@ active_instance(struct i915_active *ref, u64 idx) p = &parent->rb_left; } - node = kmalloc(sizeof(*node), GFP_KERNEL); + node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL); /* kmalloc may retire the ref->last (thanks shrinker)! */ if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { - kfree(node); + kmem_cache_free(ref->gt->slab_cache, node); goto out; } @@ -94,6 +96,9 @@ active_instance(struct i915_active *ref, u64 idx) node->ref = ref; node->timeline = idx; + if (RB_EMPTY_ROOT(&ref->tree)) + list_add(&ref->active_link, &ref->gt->active_refs); + rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); @@ -119,11 +124,11 @@ active_instance(struct i915_active *ref, u64 idx) return &ref->last; } -void i915_active_init(struct drm_i915_private *i915, +void i915_active_init(struct i915_gt_active *gt, struct i915_active *ref, void (*retire)(struct i915_active *ref)) { - ref->i915 = i915; + ref->gt = gt; ref->retire = retire; ref->tree = RB_ROOT; init_request_active(&ref->last, last_retire); @@ -161,6 +166,7 @@ void i915_active_release(struct i915_active *ref) int i915_active_wait(struct i915_active *ref) { + struct kmem_cache *slab = ref->gt->slab_cache; struct active_node *it, *n; int ret; @@ -168,15 +174,19 @@ int i915_active_wait(struct i915_active *ref) if (ret) return ret; + if (RB_EMPTY_ROOT(&ref->tree)) + return 0; + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { ret = i915_gem_active_retire(&it->base, BKL(ref)); if (ret) return ret; GEM_BUG_ON(i915_gem_active_isset(&it->base)); - kfree(it); + kmem_cache_free(slab, it); } ref->tree = RB_ROOT; + list_del(&ref->active_link); return 0; } @@ -210,15 +220,46 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) void i915_active_fini(struct i915_active *ref) { + struct kmem_cache *slab = ref->gt->slab_cache; struct active_node *it, *n; + lockdep_assert_held(BKL(ref)); GEM_BUG_ON(i915_gem_active_isset(&ref->last)); + if (RB_EMPTY_ROOT(&ref->tree)) + return; + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { GEM_BUG_ON(i915_gem_active_isset(&it->base)); - kfree(it); + kmem_cache_free(slab, it); } ref->tree = RB_ROOT; + list_del(&ref->active_link); +} + +int i915_gt_active_init(struct i915_gt_active *gt) +{ + gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!gt->slab_cache) + return -ENOMEM; + + INIT_LIST_HEAD(>->active_refs); + + return 0; +} + +void i915_gt_active_park(struct i915_gt_active *gt) +{ + struct i915_active *it, *n; + + list_for_each_entry_safe(it, n, >->active_refs, active_link) + i915_active_fini(it); +} + +void i915_gt_active_fini(struct i915_gt_active *gt) +{ + GEM_BUG_ON(!list_empty(>->active_refs)); + kmem_cache_destroy(gt->slab_cache); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index c0729a046f98..41c4a5da84c8 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -9,10 +9,6 @@ #include "i915_active_types.h" -#include <linux/rbtree.h> - -#include "i915_request.h" - /* * GPU activity tracking * @@ -39,7 +35,7 @@ * synchronisation. */ -void i915_active_init(struct drm_i915_private *i915, +void i915_active_init(struct i915_gt_active *gt, struct i915_active *ref, void (*retire)(struct i915_active *ref)); @@ -63,4 +59,19 @@ i915_active_is_idle(const struct i915_active *ref) void i915_active_fini(struct i915_active *ref); +/* + * Active refs memory management + * + * To be more economical with memory, we reap all the i915_active trees on + * parking the GPU (when we know the GPU is inactive) and allocate the nodes + * from a local slab cache to hopefully reduce the fragmentation as we will + * then be able to free all pages en masse upon idling. + */ + +int i915_gt_active_init(struct i915_gt_active *gt); +void i915_gt_active_park(struct i915_gt_active *gt); +void i915_gt_active_fini(struct i915_gt_active *gt); + +#define i915_gt_active(i915) (&(i915)->gt.active_refs) + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 411e502ed8dd..3d41c33ca78c 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -7,14 +7,17 @@ #ifndef _I915_ACTIVE_TYPES_H_ #define _I915_ACTIVE_TYPES_H_ +#include <linux/list.h> #include <linux/rbtree.h> #include "i915_request.h" -struct drm_i915_private; +struct i915_gt_active; +struct kmem_cache; struct i915_active { - struct drm_i915_private *i915; + struct i915_gt_active *gt; + struct list_head active_link; struct rb_root tree; struct i915_gem_active last; @@ -23,4 +26,9 @@ struct i915_active { void (*retire)(struct i915_active *ref); }; +struct i915_gt_active { + struct list_head active_refs; + struct kmem_cache *slab_cache; +}; + #endif /* _I915_ACTIVE_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8ec28a7f5452..480ab3e00ba8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1984,6 +1984,8 @@ struct drm_i915_private { struct list_head hwsp_free_list; } timelines; + struct i915_gt_active active_refs; + struct list_head active_rings; struct list_head closed_vma; u32 active_requests; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index caccff87a2a1..2bc735df408b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -130,6 +130,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) intel_engines_park(i915); i915_timelines_park(i915); + i915_gt_active_park(i915_gt_active(i915)); i915_pmu_gt_parked(i915); i915_vma_parked(i915); @@ -4998,15 +4999,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + ret = i915_gt_active_init(i915_gt_active(dev_priv)); + if (ret) + return ret; + i915_timelines_init(dev_priv); ret = i915_gem_init_userptr(dev_priv); if (ret) - return ret; + goto err_timelines; ret = intel_uc_init_misc(dev_priv); if (ret) - return ret; + goto err_userptr; ret = intel_wopcm_init(&dev_priv->wopcm); if (ret) @@ -5122,9 +5127,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_uc_misc: intel_uc_fini_misc(dev_priv); - if (ret != -EIO) { +err_userptr: + if (ret != -EIO) i915_gem_cleanup_userptr(dev_priv); +err_timelines: + if (ret != -EIO) { i915_timelines_fini(dev_priv); + i915_gt_active_fini(i915_gt_active(dev_priv)); } if (ret == -EIO) { @@ -5177,6 +5186,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); i915_timelines_fini(dev_priv); + i915_gt_active_fini(i915_gt_active(dev_priv)); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e625659c03a2..d8819de0d6ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - i915_active_init(i915, &vma->active, NULL); + i915_active_init(i915_gt_active(i915), &vma->active, NULL); init_request_active(&vma->last_fence, NULL); vma->vm = &ggtt->vm; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d4772061e642..2456bfb4877b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - i915_active_init(vm->i915, &vma->active, __i915_vma_retire); + i915_active_init(i915_gt_active(vm->i915), + &vma->active, __i915_vma_retire); init_request_active(&vma->last_fence, NULL); vma->vm = vm; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 7c5c3068565b..0e923476920e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915, unsigned int count = 0; int err = 0; - i915_active_init(i915, &active->base, __live_active_retire); + i915_active_init(i915_gt_active(i915), + &active->base, __live_active_retire); active->retired = false; if (!i915_active_acquire(&active->base)) { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 074a0d9cbf26..5b88f74c1677 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -69,6 +69,7 @@ static void mock_device_release(struct drm_device *dev) mutex_unlock(&i915->drm.struct_mutex); i915_timelines_fini(i915); + i915_gt_active_fini(i915_gt_active(i915)); drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); @@ -228,6 +229,9 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; + if (i915_gt_active_init(i915_gt_active(i915))) + goto err_priorities; + i915_timelines_init(i915); INIT_LIST_HEAD(&i915->gt.active_rings); @@ -257,6 +261,8 @@ struct drm_i915_private *mock_gem_device(void) err_unlock: mutex_unlock(&i915->drm.struct_mutex); i915_timelines_fini(i915); + i915_gt_active_fini(i915_gt_active(i915)); +err_priorities: kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies);
Wrap the active tracking for a GPU references in a slabcache for faster allocations, and keep track of inflight nodes so we can reap the stale entries upon parking (thereby trimming our memory usage). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_active.c | 55 ++++++++++++++++--- drivers/gpu/drm/i915/i915_active.h | 21 +++++-- drivers/gpu/drm/i915/i915_active_types.h | 12 +++- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 16 +++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/selftests/i915_active.c | 3 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ++ 9 files changed, 100 insertions(+), 20 deletions(-)