diff mbox

[RFC] drm/i915/sw_fence: Allocate callbacks from dedicates slab caches

Message ID 1478108647-11267-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin Nov. 2, 2016, 5:44 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Not sure if it matters for performance at all but it should
save some wastage and enable a better insight into the usage.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c      | 10 +++++-
 drivers/gpu/drm/i915/i915_sw_fence.c | 67 +++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_sw_fence.h |  3 ++
 3 files changed, 75 insertions(+), 5 deletions(-)

Comments

Chris Wilson Nov. 2, 2016, 5:58 p.m. UTC | #1
On Wed, Nov 02, 2016 at 05:44:07PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Not sure if it matters for performance at all but it should
> save some wastage and enable a better insight into the usage.

With the caveat of avoiding the plug into the i915.ko...

This should wait until after kernel/fence.c, as the split will make this
a little more interesting.

> +int i915_init_sw_fences(void)
> +{
> +	struct kmem_cache *wq_cache, *cb_cache;
> +
> +	mutex_lock(&i915_sw_fence_mutex);

If switched over to init ctor, the mutex is then redundant.

> +	if (i915_sw_fence_usecnt == 0) {
> +		wq_cache = kmem_cache_create("i915_sw_fence_wq",
> +					     sizeof(wait_queue_t),
> +					     __alignof__(wait_queue_head_t),
> +					     0, NULL);
> +		if (!wq_cache)
> +			goto err;
> +
> +		cb_cache = kmem_cache_create("i915_sw_fence_cb",
> +					     sizeof(struct i915_sw_dma_fence_cb),
> +					     __alignof__(struct i915_sw_dma_fence_cb),
> +					     0, NULL);

For example, this cache will need to end up in
drivers/dma-buf/dma-fence.c
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5839bebba64a..57e550449992 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4748,6 +4748,10 @@  i915_gem_load_init(struct drm_device *dev)
 		goto err_vmas;
 	}
 
+	err = i915_init_sw_fences();
+	if (err)
+		goto err_requests;
+
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	err = i915_gem_timeline_init(dev_priv,
@@ -4755,7 +4759,7 @@  i915_gem_load_init(struct drm_device *dev)
 				     "[execution]");
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (err)
-		goto err_requests;
+		goto err_sw_fences;
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4783,6 +4787,8 @@  i915_gem_load_init(struct drm_device *dev)
 
 	return 0;
 
+err_sw_fences:
+	i915_fini_sw_fences();
 err_requests:
 	kmem_cache_destroy(dev_priv->requests);
 err_vmas:
@@ -4799,6 +4805,8 @@  void i915_gem_load_cleanup(struct drm_device *dev)
 
 	WARN_ON(!llist_empty(&dev_priv->mm.free_list));
 
+	i915_fini_sw_fences();
+
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 95f2f12e0917..5f814b60e2c0 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -17,6 +17,11 @@ 
 
 static DEFINE_SPINLOCK(i915_sw_fence_lock);
 
+static DEFINE_MUTEX(i915_sw_fence_mutex);
+static unsigned int i915_sw_fence_usecnt;
+static struct kmem_cache *i915_sw_fence_wq_cache;
+static struct kmem_cache *i915_sw_fence_cb_cache;
+
 static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
 				  enum i915_sw_fence_notify state)
 {
@@ -138,7 +143,7 @@  static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
 	__i915_sw_fence_complete(wq->private, key);
 	i915_sw_fence_put(wq->private);
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
-		kfree(wq);
+		kmem_cache_free(i915_sw_fence_wq_cache, wq);
 	return 0;
 }
 
@@ -212,7 +217,7 @@  static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 
 	pending = 0;
 	if (!wq) {
-		wq = kmalloc(sizeof(*wq), gfp);
+		wq = kmem_cache_alloc(i915_sw_fence_wq_cache, gfp);
 		if (!wq) {
 			if (!gfpflags_allow_blocking(gfp))
 				return -ENOMEM;
@@ -290,7 +295,7 @@  static void dma_i915_sw_fence_wake(struct dma_fence *dma,
 		i915_sw_fence_commit(cb->fence);
 	dma_fence_put(cb->dma);
 
-	kfree(cb);
+	kmem_cache_free(i915_sw_fence_cb_cache, cb);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
@@ -304,7 +309,7 @@  int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	if (dma_fence_is_signaled(dma))
 		return 0;
 
-	cb = kmalloc(sizeof(*cb), gfp);
+	cb = kmem_cache_alloc(i915_sw_fence_cb_cache, gfp);
 	if (!cb) {
 		if (!gfpflags_allow_blocking(gfp))
 			return -ENOMEM;
@@ -393,3 +398,57 @@  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 
 	return ret;
 }
+
+int i915_init_sw_fences(void)
+{
+	struct kmem_cache *wq_cache, *cb_cache;
+
+	mutex_lock(&i915_sw_fence_mutex);
+
+	if (i915_sw_fence_usecnt == 0) {
+		wq_cache = kmem_cache_create("i915_sw_fence_wq",
+					     sizeof(wait_queue_t),
+					     __alignof__(wait_queue_head_t),
+					     0, NULL);
+		if (!wq_cache)
+			goto err;
+
+		cb_cache = kmem_cache_create("i915_sw_fence_cb",
+					     sizeof(struct i915_sw_dma_fence_cb),
+					     __alignof__(struct i915_sw_dma_fence_cb),
+					     0, NULL);
+		if (!cb_cache) {
+			kmem_cache_destroy(wq_cache);
+			goto err;
+		}
+
+		i915_sw_fence_wq_cache = wq_cache;
+		i915_sw_fence_cb_cache = cb_cache;
+
+		i915_sw_fence_usecnt++;
+	}
+
+	mutex_unlock(&i915_sw_fence_mutex);
+
+	return 0;
+
+err:
+	mutex_unlock(&i915_sw_fence_mutex);
+	return -ENOMEM;
+}
+
+void i915_fini_sw_fences(void)
+{
+	mutex_lock(&i915_sw_fence_mutex);
+
+	i915_sw_fence_usecnt--;
+
+	if (i915_sw_fence_usecnt == 0) {
+		kmem_cache_destroy(i915_sw_fence_cb_cache);
+		kmem_cache_destroy(i915_sw_fence_wq_cache);
+
+		i915_sw_fence_cb_cache = i915_sw_fence_wq_cache = NULL;
+	}
+
+	mutex_unlock(&i915_sw_fence_mutex);
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 707dfc4f0da5..33153a9c6f2a 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -40,6 +40,9 @@  typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
 				      enum i915_sw_fence_notify state);
 #define __i915_sw_fence_call __aligned(4)
 
+int i915_init_sw_fences(void);
+void i915_fini_sw_fences(void);
+
 void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
 void i915_sw_fence_commit(struct i915_sw_fence *fence);