diff mbox

[3/4] drm/ttm, drm/vmwgfx: Use RCU locking for object lookups v3

Message ID 1352201511-6383-4-git-send-email-thellstrom@vmware.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Hellstrom Nov. 6, 2012, 11:31 a.m. UTC
The mostly used lookup+get put+potential_destroy path of TTM objects
is converted to use RCU locks. This will substantially decrease the amount
of locked bus cycles during normal operation.
Since we use kfree_rcu to free the objects, no rcu synchronization is needed
at module unload time.

v2: Don't touch include/linux/kref.h
v3: Adapt to kref_get_unless_zero return value change

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c         |   30 +++++++++++-------------------
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |    8 ++++----
 include/drm/ttm/ttm_object.h             |    4 ++++
 3 files changed, 19 insertions(+), 23 deletions(-)

Comments

Dave Airlie Nov. 20, 2012, 6:19 a.m. UTC | #1
On Tue, Nov 6, 2012 at 9:31 PM, Thomas Hellstrom <thellstrom@vmware.com> wrote:
> The mostly used lookup+get put+potential_destroy path of TTM objects
> is converted to use RCU locks. This will substantially decrease the amount
> of locked bus cycles during normal operation.
> Since we use kfree_rcu to free the objects, no rcu synchronization is needed
> at module unload time.

As this is the first use of RCU in a drm driver from what I can see,
let me remind that the
RCU patent agreement AFAIK only covers GPL works.

So non-GPL or other OSes porting this code should take not of this.

Dave.
Thomas Hellstrom Nov. 20, 2012, 6:44 a.m. UTC | #2
On 11/20/2012 07:19 AM, Dave Airlie wrote:
> On Tue, Nov 6, 2012 at 9:31 PM, Thomas Hellstrom <thellstrom@vmware.com> wrote:
>> The mostly used lookup+get put+potential_destroy path of TTM objects
>> is converted to use RCU locks. This will substantially decrease the amount
>> of locked bus cycles during normal operation.
>> Since we use kfree_rcu to free the objects, no rcu synchronization is needed
>> at module unload time.
> As this is the first use of RCU in a drm driver from what I can see,
> let me remind that the
> RCU patent agreement AFAIK only covers GPL works.
>
> So non-GPL or other OSes porting this code should take not of this.
>
> Dave.

 From VMware's side this won't be a problem, since other VMware kernel 
modules (VMCI IIRC) use RCU.

In any case I have a new version of the "vmwgfx optimization" patch 
series that mostly add documentation and
annotation (by  using a drm_ht_xxx_rcu) interface for hashtab, after an 
internal review by Dmitry Torkov. I see you've already
applied the original patch series. Do you want me to send out the new 
one or rebase it against current drm-next?

Thanks,
Thomas
Dave Airlie Nov. 20, 2012, 7:59 a.m. UTC | #3
On Tue, Nov 20, 2012 at 4:44 PM, Thomas Hellstrom <thellstrom@vmware.com> wrote:
> On 11/20/2012 07:19 AM, Dave Airlie wrote:
>>
>> On Tue, Nov 6, 2012 at 9:31 PM, Thomas Hellstrom <thellstrom@vmware.com>
>> wrote:
>>>
>>> The mostly used lookup+get put+potential_destroy path of TTM objects
>>> is converted to use RCU locks. This will substantially decrease the
>>> amount
>>> of locked bus cycles during normal operation.
>>> Since we use kfree_rcu to free the objects, no rcu synchronization is
>>> needed
>>> at module unload time.
>>
>> As this is the first use of RCU in a drm driver from what I can see,
>> let me remind that the
>> RCU patent agreement AFAIK only covers GPL works.
>>
>> So non-GPL or other OSes porting this code should take not of this.
>>
>> Dave.
>
>
> From VMware's side this won't be a problem, since other VMware kernel
> modules (VMCI IIRC) use RCU.
>
> In any case I have a new version of the "vmwgfx optimization" patch series
> that mostly add documentation and
> annotation (by  using a drm_ht_xxx_rcu) interface for hashtab, after an
> internal review by Dmitry Torkov. I see you've already
> applied the original patch series. Do you want me to send out the new one or
> rebase it against current drm-next?

Can you rebase it on top of -next?

Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index c785787..f18eeb4 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -80,7 +80,7 @@  struct ttm_object_file {
  */
 
 struct ttm_object_device {
-	rwlock_t object_lock;
+	spinlock_t object_lock;
 	struct drm_open_hash object_hash;
 	atomic_t object_count;
 	struct ttm_mem_global *mem_glob;
@@ -157,12 +157,12 @@  int ttm_base_object_init(struct ttm_object_file *tfile,
 	base->refcount_release = refcount_release;
 	base->ref_obj_release = ref_obj_release;
 	base->object_type = object_type;
-	write_lock(&tdev->object_lock);
+	spin_lock(&tdev->object_lock);
 	kref_init(&base->refcount);
 	ret = drm_ht_just_insert_please(&tdev->object_hash,
 					&base->hash,
 					(unsigned long)base, 31, 0, 0);
-	write_unlock(&tdev->object_lock);
+	spin_unlock(&tdev->object_lock);
 	if (unlikely(ret != 0))
 		goto out_err0;
 
@@ -186,30 +186,22 @@  static void ttm_release_base(struct kref *kref)
 	    container_of(kref, struct ttm_base_object, refcount);
 	struct ttm_object_device *tdev = base->tfile->tdev;
 
+	spin_lock(&tdev->object_lock);
 	(void)drm_ht_remove_item(&tdev->object_hash, &base->hash);
-	write_unlock(&tdev->object_lock);
+	spin_unlock(&tdev->object_lock);
 	if (base->refcount_release) {
 		ttm_object_file_unref(&base->tfile);
 		base->refcount_release(&base);
 	}
-	write_lock(&tdev->object_lock);
 }
 
 void ttm_base_object_unref(struct ttm_base_object **p_base)
 {
 	struct ttm_base_object *base = *p_base;
-	struct ttm_object_device *tdev = base->tfile->tdev;
 
 	*p_base = NULL;
 
-	/*
-	 * Need to take the lock here to avoid racing with
-	 * users trying to look up the object.
-	 */
-
-	write_lock(&tdev->object_lock);
 	kref_put(&base->refcount, ttm_release_base);
-	write_unlock(&tdev->object_lock);
 }
 EXPORT_SYMBOL(ttm_base_object_unref);
 
@@ -221,14 +213,14 @@  struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile,
 	struct drm_hash_item *hash;
 	int ret;
 
-	read_lock(&tdev->object_lock);
+	rcu_read_lock();
 	ret = drm_ht_find_item(&tdev->object_hash, key, &hash);
 
 	if (likely(ret == 0)) {
 		base = drm_hash_entry(hash, struct ttm_base_object, hash);
-		kref_get(&base->refcount);
+		ret = kref_get_unless_zero(&base->refcount) ? 0 : -EINVAL;
 	}
-	read_unlock(&tdev->object_lock);
+	rcu_read_unlock();
 
 	if (unlikely(ret != 0))
 		return NULL;
@@ -426,7 +418,7 @@  struct ttm_object_device *ttm_object_device_init(struct ttm_mem_global
 		return NULL;
 
 	tdev->mem_glob = mem_glob;
-	rwlock_init(&tdev->object_lock);
+	spin_lock_init(&tdev->object_lock);
 	atomic_set(&tdev->object_count, 0);
 	ret = drm_ht_create(&tdev->object_hash, hash_order);
 
@@ -444,9 +436,9 @@  void ttm_object_device_release(struct ttm_object_device **p_tdev)
 
 	*p_tdev = NULL;
 
-	write_lock(&tdev->object_lock);
+	spin_lock(&tdev->object_lock);
 	drm_ht_remove(&tdev->object_hash);
-	write_unlock(&tdev->object_lock);
+	spin_unlock(&tdev->object_lock);
 
 	kfree(tdev);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index da3c6b5..ae675c6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -351,7 +351,7 @@  static void vmw_user_context_free(struct vmw_resource *res)
 	    container_of(res, struct vmw_user_context, res);
 	struct vmw_private *dev_priv = res->dev_priv;
 
-	kfree(ctx);
+	ttm_base_object_kfree(ctx, base);
 	ttm_mem_global_free(vmw_mem_glob(dev_priv),
 			    vmw_user_context_size);
 }
@@ -1147,7 +1147,7 @@  static void vmw_user_surface_free(struct vmw_resource *res)
 	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
-	kfree(user_srf);
+	ttm_base_object_kfree(user_srf, base);
 	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
 }
 
@@ -1575,7 +1575,7 @@  static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo)
 {
 	struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo);
 
-	kfree(vmw_user_bo);
+	ttm_base_object_kfree(vmw_user_bo, base);
 }
 
 static void vmw_user_dmabuf_release(struct ttm_base_object **p_base)
@@ -1763,7 +1763,7 @@  static void vmw_user_stream_free(struct vmw_resource *res)
 	    container_of(res, struct vmw_user_stream, stream.res);
 	struct vmw_private *dev_priv = res->dev_priv;
 
-	kfree(stream);
+	ttm_base_object_kfree(stream, base);
 	ttm_mem_global_free(vmw_mem_glob(dev_priv),
 			    vmw_user_stream_size);
 }
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index b01c563..fc0cf06 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -40,6 +40,7 @@ 
 #include <linux/list.h>
 #include <drm/drm_hashtab.h>
 #include <linux/kref.h>
+#include <linux/rcupdate.h>
 #include <ttm/ttm_memory.h>
 
 /**
@@ -120,6 +121,7 @@  struct ttm_object_device;
  */
 
 struct ttm_base_object {
+	struct rcu_head rhead;
 	struct drm_hash_item hash;
 	enum ttm_object_type object_type;
 	bool shareable;
@@ -268,4 +270,6 @@  extern struct ttm_object_device *ttm_object_device_init
 
 extern void ttm_object_device_release(struct ttm_object_device **p_tdev);
 
+#define ttm_base_object_kfree(__object, __base)\
+	kfree_rcu(__object, __base.rhead)
 #endif