diff mbox series

drm: fix deadlock of syncobj v4

Message ID 20181023015008.6511-1-david1.zhou@amd.com (mailing list archive)
State New, archived
Headers show
Series drm: fix deadlock of syncobj v4 | expand

Commit Message

Chunming Zhou Oct. 23, 2018, 1:50 a.m. UTC
v2:
add a mutex between sync_cb execution and free.
v3:
clearly separating the roles for pt_lock and cb_mutex (Chris)
v4:
the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris)

Tested by syncobj_basic and syncobj_wait of igt.

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 51 ++++++++++++++++++-----------------
 include/drm/drm_syncobj.h     |  8 ++++--
 2 files changed, 33 insertions(+), 26 deletions(-)

Comments

Chris Wilson Oct. 23, 2018, 7:51 a.m. UTC | #1
Quoting Chunming Zhou (2018-10-23 02:50:08)
> v2:
> add a mutex between sync_cb execution and free.
> v3:
> clearly separating the roles for pt_lock and cb_mutex (Chris)
> v4:
> the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris)
> 
> Tested by syncobj_basic and syncobj_wait of igt.
> 
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/drm_syncobj.c | 51 ++++++++++++++++++-----------------
>  include/drm/drm_syncobj.h     |  8 ++++--
>  2 files changed, 33 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 57bf6006394d..315f08132f6d 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>         if (!ret)
>                 return 1;
>  
> -       spin_lock(&syncobj->lock);
> +       mutex_lock(&syncobj->cb_mutex);
>         /* We've already tried once to get a fence and failed.  Now that we
>          * have the lock, try one more time just to be sure we don't add a
>          * callback when a fence has already been set.
>          */
> +       spin_lock(&syncobj->pt_lock);
>         if (!list_empty(&syncobj->signal_pt_list)) {
> -               spin_unlock(&syncobj->lock);
> +               spin_unlock(&syncobj->pt_lock);
>                 drm_syncobj_search_fence(syncobj, 0, 0, fence);
>                 if (*fence)

mutex_unlock(&syncobj->cb_mutex);

With that,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Can you please resend with Cc: intel-gfx@lists.freedesktop.org so we can
double check the fix.
-Chris
Chunming Zhou Oct. 23, 2018, 8 a.m. UTC | #2
On 2018年10月23日 15:51, Chris Wilson wrote:
> Quoting Chunming Zhou (2018-10-23 02:50:08)
>> v2:
>> add a mutex between sync_cb execution and free.
>> v3:
>> clearly separating the roles for pt_lock and cb_mutex (Chris)
>> v4:
>> the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris)
>>
>> Tested by syncobj_basic and syncobj_wait of igt.
>>
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Daniel Vetter <daniel@ffwll.ch>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/drm_syncobj.c | 51 ++++++++++++++++++-----------------
>>   include/drm/drm_syncobj.h     |  8 ++++--
>>   2 files changed, 33 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 57bf6006394d..315f08132f6d 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>>          if (!ret)
>>                  return 1;
>>   
>> -       spin_lock(&syncobj->lock);
>> +       mutex_lock(&syncobj->cb_mutex);
>>          /* We've already tried once to get a fence and failed.  Now that we
>>           * have the lock, try one more time just to be sure we don't add a
>>           * callback when a fence has already been set.
>>           */
>> +       spin_lock(&syncobj->pt_lock);
>>          if (!list_empty(&syncobj->signal_pt_list)) {
>> -               spin_unlock(&syncobj->lock);
>> +               spin_unlock(&syncobj->pt_lock);
>>                  drm_syncobj_search_fence(syncobj, 0, 0, fence);
>>                  if (*fence)
> mutex_unlock(&syncobj->cb_mutex);
fixed.

>
> With that,
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Thanks

>
> Can you please resend with Cc: intel-gfx@lists.freedesktop.org so we can
> double check the fix.
resent, Could you help to submit patch to drm-misc?

Thanks,
David Zhou
> -Chris
Chris Wilson Oct. 23, 2018, 8:47 a.m. UTC | #3
Quoting zhoucm1 (2018-10-23 09:00:26)
> 
> 
> On 2018年10月23日 15:51, Chris Wilson wrote:
> > Quoting Chunming Zhou (2018-10-23 02:50:08)
> >> v2:
> >> add a mutex between sync_cb execution and free.
> >> v3:
> >> clearly separating the roles for pt_lock and cb_mutex (Chris)
> >> v4:
> >> the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris)
> >>
> >> Tested by syncobj_basic and syncobj_wait of igt.
> >>
> >> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> >> Cc: Daniel Vetter <daniel@ffwll.ch>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/drm_syncobj.c | 51 ++++++++++++++++++-----------------
> >>   include/drm/drm_syncobj.h     |  8 ++++--
> >>   2 files changed, 33 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >> index 57bf6006394d..315f08132f6d 100644
> >> --- a/drivers/gpu/drm/drm_syncobj.c
> >> +++ b/drivers/gpu/drm/drm_syncobj.c
> >> @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> >>          if (!ret)
> >>                  return 1;
> >>   
> >> -       spin_lock(&syncobj->lock);
> >> +       mutex_lock(&syncobj->cb_mutex);
> >>          /* We've already tried once to get a fence and failed.  Now that we
> >>           * have the lock, try one more time just to be sure we don't add a
> >>           * callback when a fence has already been set.
> >>           */
> >> +       spin_lock(&syncobj->pt_lock);
> >>          if (!list_empty(&syncobj->signal_pt_list)) {
> >> -               spin_unlock(&syncobj->lock);
> >> +               spin_unlock(&syncobj->pt_lock);
> >>                  drm_syncobj_search_fence(syncobj, 0, 0, fence);
> >>                  if (*fence)
> > mutex_unlock(&syncobj->cb_mutex);
> fixed.
> 
> >
> > With that,
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Thanks
> 
> >
> > Can you please resend with Cc: intel-gfx@lists.freedesktop.org so we can
> > double check the fix.
> resent, Could you help to submit patch to drm-misc?

It should have time on the shards this afternoon, so all going to plan,
yup.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 57bf6006394d..315f08132f6d 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -125,23 +125,24 @@  static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
 	if (!ret)
 		return 1;
 
-	spin_lock(&syncobj->lock);
+	mutex_lock(&syncobj->cb_mutex);
 	/* We've already tried once to get a fence and failed.  Now that we
 	 * have the lock, try one more time just to be sure we don't add a
 	 * callback when a fence has already been set.
 	 */
+	spin_lock(&syncobj->pt_lock);
 	if (!list_empty(&syncobj->signal_pt_list)) {
-		spin_unlock(&syncobj->lock);
+		spin_unlock(&syncobj->pt_lock);
 		drm_syncobj_search_fence(syncobj, 0, 0, fence);
 		if (*fence)
 			return 1;
-		spin_lock(&syncobj->lock);
 	} else {
+		spin_unlock(&syncobj->pt_lock);
 		*fence = NULL;
 		drm_syncobj_add_callback_locked(syncobj, cb, func);
 		ret = 0;
 	}
-	spin_unlock(&syncobj->lock);
+	mutex_unlock(&syncobj->cb_mutex);
 
 	return ret;
 }
@@ -150,43 +151,43 @@  void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
 			      struct drm_syncobj_cb *cb,
 			      drm_syncobj_func_t func)
 {
-	spin_lock(&syncobj->lock);
+	mutex_lock(&syncobj->cb_mutex);
 	drm_syncobj_add_callback_locked(syncobj, cb, func);
-	spin_unlock(&syncobj->lock);
+	mutex_unlock(&syncobj->cb_mutex);
 }
 
 void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
 				 struct drm_syncobj_cb *cb)
 {
-	spin_lock(&syncobj->lock);
+	mutex_lock(&syncobj->cb_mutex);
 	list_del_init(&cb->node);
-	spin_unlock(&syncobj->lock);
+	mutex_unlock(&syncobj->cb_mutex);
 }
 
 static void drm_syncobj_init(struct drm_syncobj *syncobj)
 {
-	spin_lock(&syncobj->lock);
+	spin_lock(&syncobj->pt_lock);
 	syncobj->timeline_context = dma_fence_context_alloc(1);
 	syncobj->timeline = 0;
 	syncobj->signal_point = 0;
 	init_waitqueue_head(&syncobj->wq);
 
 	INIT_LIST_HEAD(&syncobj->signal_pt_list);
-	spin_unlock(&syncobj->lock);
+	spin_unlock(&syncobj->pt_lock);
 }
 
 static void drm_syncobj_fini(struct drm_syncobj *syncobj)
 {
 	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
 
-	spin_lock(&syncobj->lock);
+	spin_lock(&syncobj->pt_lock);
 	list_for_each_entry_safe(signal_pt, tmp,
 				 &syncobj->signal_pt_list, list) {
 		list_del(&signal_pt->list);
 		dma_fence_put(&signal_pt->fence_array->base);
 		kfree(signal_pt);
 	}
-	spin_unlock(&syncobj->lock);
+	spin_unlock(&syncobj->pt_lock);
 }
 
 static struct dma_fence
@@ -249,14 +250,14 @@  static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
 	fences[num_fences++] = dma_fence_get(fence);
 	/* timeline syncobj must take this dependency */
 	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
-		spin_lock(&syncobj->lock);
+		spin_lock(&syncobj->pt_lock);
 		if (!list_empty(&syncobj->signal_pt_list)) {
 			tail_pt = list_last_entry(&syncobj->signal_pt_list,
 						  struct drm_syncobj_signal_pt, list);
 			fences[num_fences++] =
 				dma_fence_get(&tail_pt->fence_array->base);
 		}
-		spin_unlock(&syncobj->lock);
+		spin_unlock(&syncobj->pt_lock);
 	}
 	signal_pt->fence_array = dma_fence_array_create(num_fences, fences,
 							syncobj->timeline_context,
@@ -266,16 +267,16 @@  static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
 		goto fail;
 	}
 
-	spin_lock(&syncobj->lock);
+	spin_lock(&syncobj->pt_lock);
 	if (syncobj->signal_point >= point) {
 		DRM_WARN("A later signal is ready!");
-		spin_unlock(&syncobj->lock);
+		spin_unlock(&syncobj->pt_lock);
 		goto exist;
 	}
 	signal_pt->value = point;
 	list_add_tail(&signal_pt->list, &syncobj->signal_pt_list);
 	syncobj->signal_point = point;
-	spin_unlock(&syncobj->lock);
+	spin_unlock(&syncobj->pt_lock);
 	wake_up_all(&syncobj->wq);
 
 	return 0;
@@ -294,7 +295,7 @@  static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
 {
 	struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt;
 
-	spin_lock(&syncobj->lock);
+	spin_lock(&syncobj->pt_lock);
 	tail_pt = list_last_entry(&syncobj->signal_pt_list,
 				  struct drm_syncobj_signal_pt,
 				  list);
@@ -315,7 +316,7 @@  static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
 		}
 	}
 
-	spin_unlock(&syncobj->lock);
+	spin_unlock(&syncobj->pt_lock);
 }
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
@@ -344,13 +345,14 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
 	if (fence) {
 		struct drm_syncobj_cb *cur, *tmp;
+		LIST_HEAD(cb_list);
 
-		spin_lock(&syncobj->lock);
+		mutex_lock(&syncobj->cb_mutex);
 		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
 			list_del_init(&cur->node);
 			cur->func(syncobj, cur);
 		}
-		spin_unlock(&syncobj->lock);
+		mutex_unlock(&syncobj->cb_mutex);
 	}
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
@@ -386,11 +388,11 @@  drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags,
 		if (ret < 0)
 			return ret;
 	}
-	spin_lock(&syncobj->lock);
+	spin_lock(&syncobj->pt_lock);
 	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
 	if (!*fence)
 		ret = -EINVAL;
-	spin_unlock(&syncobj->lock);
+	spin_unlock(&syncobj->pt_lock);
 	return ret;
 }
 
@@ -500,7 +502,8 @@  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 
 	kref_init(&syncobj->refcount);
 	INIT_LIST_HEAD(&syncobj->cb_list);
-	spin_lock_init(&syncobj->lock);
+	spin_lock_init(&syncobj->pt_lock);
+	mutex_init(&syncobj->cb_mutex);
 	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
 		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
 	else
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 5e8c5c027e09..29244cbcd05e 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -75,9 +75,13 @@  struct drm_syncobj {
          */
 	struct list_head cb_list;
 	/**
-	 * @lock: Protects syncobj list and write-locks &fence.
+	 * @pt_lock: Protects pt list.
 	 */
-	spinlock_t lock;
+	spinlock_t pt_lock;
+	/**
+	 * @cb_mutex: Protects syncobj cb list.
+	 */
+	struct mutex cb_mutex;
 	/**
 	 * @file: A file backing for this syncobj.
 	 */