@@ -7,7 +7,9 @@
#include "i915_drv.h"
#include "i915_active.h"
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+#define i915_from_gt(x) \
+ container_of(x, struct drm_i915_private, gt.active_refs)
+#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex)
struct active_node {
struct i915_gem_active base;
@@ -79,11 +81,11 @@ active_instance(struct i915_active *ref, u64 idx)
p = &parent->rb_left;
}
- node = kmalloc(sizeof(*node), GFP_KERNEL);
+ node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL);
/* kmalloc may retire the ref->last (thanks shrinker)! */
if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
- kfree(node);
+ kmem_cache_free(ref->gt->slab_cache, node);
goto out;
}
@@ -94,6 +96,9 @@ active_instance(struct i915_active *ref, u64 idx)
node->ref = ref;
node->timeline = idx;
+ if (RB_EMPTY_ROOT(&ref->tree))
+ list_add(&ref->active_link, &ref->gt->active_refs);
+
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &ref->tree);
@@ -119,11 +124,11 @@ active_instance(struct i915_active *ref, u64 idx)
return &ref->last;
}
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
struct i915_active *ref,
void (*retire)(struct i915_active *ref))
{
- ref->i915 = i915;
+ ref->gt = gt;
ref->retire = retire;
ref->tree = RB_ROOT;
init_request_active(&ref->last, last_retire);
@@ -161,6 +166,7 @@ void i915_active_release(struct i915_active *ref)
int i915_active_wait(struct i915_active *ref)
{
+ struct kmem_cache *slab = ref->gt->slab_cache;
struct active_node *it, *n;
int ret;
@@ -168,15 +174,19 @@ int i915_active_wait(struct i915_active *ref)
if (ret)
return ret;
+ if (RB_EMPTY_ROOT(&ref->tree))
+ return 0;
+
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
ret = i915_gem_active_retire(&it->base, BKL(ref));
if (ret)
return ret;
GEM_BUG_ON(i915_gem_active_isset(&it->base));
- kfree(it);
+ kmem_cache_free(slab, it);
}
ref->tree = RB_ROOT;
+ list_del(&ref->active_link);
return 0;
}
@@ -210,15 +220,46 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
void i915_active_fini(struct i915_active *ref)
{
+ struct kmem_cache *slab = ref->gt->slab_cache;
struct active_node *it, *n;
+ lockdep_assert_held(BKL(ref));
GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+ if (RB_EMPTY_ROOT(&ref->tree))
+ return;
+
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
GEM_BUG_ON(i915_gem_active_isset(&it->base));
- kfree(it);
+ kmem_cache_free(slab, it);
}
ref->tree = RB_ROOT;
+ list_del(&ref->active_link);
+}
+
+int i915_gt_active_init(struct i915_gt_active *gt)
+{
+ gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+ if (!gt->slab_cache)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(>->active_refs);
+
+ return 0;
+}
+
+void i915_gt_active_park(struct i915_gt_active *gt)
+{
+ struct i915_active *it, *n;
+
+ list_for_each_entry_safe(it, n, >->active_refs, active_link)
+ i915_active_fini(it);
+}
+
+void i915_gt_active_fini(struct i915_gt_active *gt)
+{
+ GEM_BUG_ON(!list_empty(>->active_refs));
+ kmem_cache_destroy(gt->slab_cache);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -7,11 +7,13 @@
#ifndef _I915_ACTIVE_H_
#define _I915_ACTIVE_H_
+#include <linux/list.h>
#include <linux/rbtree.h>
#include "i915_request.h"
-struct drm_i915_private;
+struct i915_gt_active;
+struct kmem_cache;
/*
* GPU activity tracking
@@ -40,7 +42,8 @@ struct drm_i915_private;
*/
struct i915_active {
- struct drm_i915_private *i915;
+ struct i915_gt_active *gt;
+ struct list_head active_link;
struct rb_root tree;
struct i915_gem_active last;
@@ -49,7 +52,7 @@ struct i915_active {
void (*retire)(struct i915_active *ref);
};
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
struct i915_active *ref,
void (*retire)(struct i915_active *ref));
@@ -73,4 +76,24 @@ i915_active_is_idle(const struct i915_active *ref)
void i915_active_fini(struct i915_active *ref);
+/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees on
+ * parking the GPU (when we know the GPU is inactive) and allocate the nodes
+ * from a local slab cache to hopefully reduce the fragmentation as we will
+ * then be able to free all pages en masse upon idling.
+ */
+
+struct i915_gt_active {
+ struct list_head active_refs;
+ struct kmem_cache *slab_cache;
+};
+
+int i915_gt_active_init(struct i915_gt_active *gt);
+void i915_gt_active_park(struct i915_gt_active *gt);
+void i915_gt_active_fini(struct i915_gt_active *gt);
+
+#define i915_gt_active(i915) (&(i915)->gt.active_refs)
+
#endif /* _I915_ACTIVE_H_ */
@@ -1984,6 +1984,8 @@ struct drm_i915_private {
struct list_head hwsp_free_list;
} timelines;
+ struct i915_gt_active active_refs;
+
struct list_head active_rings;
struct list_head closed_vma;
u32 active_requests;
@@ -171,6 +171,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
intel_engines_park(i915);
i915_timelines_park(i915);
+ i915_gt_active_park(i915_gt_active(i915));
i915_pmu_gt_parked(i915);
i915_vma_parked(i915);
@@ -5031,15 +5032,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
}
+ ret = i915_gt_active_init(i915_gt_active(dev_priv));
+ if (ret)
+ return ret;
+
i915_timelines_init(dev_priv);
ret = i915_gem_init_userptr(dev_priv);
if (ret)
- return ret;
+ goto err_timelines;
ret = intel_uc_init_misc(dev_priv);
if (ret)
- return ret;
+ goto err_userptr;
ret = intel_wopcm_init(&dev_priv->wopcm);
if (ret)
@@ -5155,9 +5160,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_uc_misc:
intel_uc_fini_misc(dev_priv);
- if (ret != -EIO) {
+err_userptr:
+ if (ret != -EIO)
i915_gem_cleanup_userptr(dev_priv);
+err_timelines:
+ if (ret != -EIO) {
i915_timelines_fini(dev_priv);
+ i915_gt_active_fini(i915_gt_active(dev_priv));
}
if (ret == -EIO) {
@@ -5210,6 +5219,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv);
i915_timelines_fini(dev_priv);
+ i915_gt_active_fini(i915_gt_active(dev_priv));
i915_gem_drain_freed_objects(dev_priv);
@@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
if (!vma)
return ERR_PTR(-ENOMEM);
- i915_active_init(i915, &vma->active, NULL);
+ i915_active_init(i915_gt_active(i915), &vma->active, NULL);
init_request_active(&vma->last_fence, NULL);
vma->vm = &ggtt->vm;
@@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
+ i915_active_init(i915_gt_active(vm->i915),
+ &vma->active, __i915_vma_retire);
init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
@@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915,
unsigned int count = 0;
int err = 0;
- i915_active_init(i915, &active->base, __live_active_retire);
+ i915_active_init(i915_gt_active(i915),
+ &active->base, __live_active_retire);
active->retired = false;
if (!i915_active_acquire(&active->base)) {
@@ -66,9 +66,9 @@ static void mock_device_release(struct drm_device *dev)
for_each_engine(engine, i915, id)
mock_engine_free(engine);
i915_gem_contexts_fini(i915);
- mutex_unlock(&i915->drm.struct_mutex);
-
i915_timelines_fini(i915);
+ i915_gt_active_fini(i915_gt_active(i915));
+ mutex_unlock(&i915->drm.struct_mutex);
drain_workqueue(i915->wq);
i915_gem_drain_freed_objects(i915);
@@ -227,6 +227,9 @@ struct drm_i915_private *mock_gem_device(void)
if (!i915->priorities)
goto err_dependencies;
+ if (i915_gt_active_init(i915_gt_active(i915)))
+ goto err_priorities;
+
i915_timelines_init(i915);
INIT_LIST_HEAD(&i915->gt.active_rings);
@@ -256,6 +259,8 @@ struct drm_i915_private *mock_gem_device(void)
err_unlock:
mutex_unlock(&i915->drm.struct_mutex);
i915_timelines_fini(i915);
+ i915_gt_active_fini(i915_gt_active(i915));
+err_priorities:
kmem_cache_destroy(i915->priorities);
err_dependencies:
kmem_cache_destroy(i915->dependencies);
Wrap the active tracking for a GPU references in a slabcache for faster allocations, and keep track of inflight nodes so we can reap the stale entries upon parking (thereby trimming our memory usage). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_active.c | 55 ++++++++++++++++--- drivers/gpu/drm/i915/i915_active.h | 29 +++++++++- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 16 +++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/i915/selftests/i915_active.c | 3 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 9 ++- 8 files changed, 101 insertions(+), 18 deletions(-)