diff mbox series

[2/2] drm: Revert syncobj timeline changes.

Message ID 20181108160422.17743-3-eric@anholt.net (mailing list archive)
State New, archived
Headers show
Series reverts to un-regress v3d | expand

Commit Message

Eric Anholt Nov. 8, 2018, 4:04 p.m. UTC
Daniel suggested I submit this, since we're still seeing regressions
from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
timeline support v9") and its followon fixes.

Fixes this on first V3D testcase execution:

[   48.767088] ============================================
[   48.772410] WARNING: possible recursive locking detected
[   48.777739] 4.19.0-rc6+ #489 Not tainted
[   48.781668] --------------------------------------------
[   48.786993] shader_runner/3284 is trying to acquire lock:
[   48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
[   48.800714]
[   48.800714] but task is already holding lock:
[   48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
[   48.814862]
[   48.814862] other info that might help us debug this:
[   48.821410]  Possible unsafe locking scenario:
[   48.821410]
[   48.827338]        CPU0
[   48.829788]        ----
[   48.832239]   lock(&(&array->lock)->rlock);
[   48.836434]   lock(&(&array->lock)->rlock);
[   48.840640]
[   48.840640]  *** DEADLOCK ***
[   48.840640]
[   48.846582]  May be due to missing lock nesting notation
[  130.763560] 1 lock held by cts-runner/3270:
[  130.767745]  #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c
[  130.776461]
               stack backtrace:
[  130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486
[  130.787706] Hardware name: Broadcom STB (Flattened Device Tree)
[  130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14)
[  130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4)
[  130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68)
[  130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c)
[  130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68)
[  130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c)
[  130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec)
[  130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c)
[  130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614)
[  130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0)
[  130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390)
[  130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac)
[  130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60)
[  130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28)

Cc: Chunming Zhou <david1.zhou@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/drm_syncobj.c | 359 +++++++---------------------------
 include/drm/drm_syncobj.h     |  73 ++++---
 2 files changed, 105 insertions(+), 327 deletions(-)

Comments

Christian König Nov. 8, 2018, 4:07 p.m. UTC | #1
Am 08.11.18 um 17:04 schrieb Eric Anholt:
> Daniel suggested I submit this, since we're still seeing regressions
> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
> timeline support v9") and its followon fixes.

This is a harmless false positive from lockdep, Chouming and I are 
already working on a fix.

Christian.

>
> Fixes this on first V3D testcase execution:
>
> [   48.767088] ============================================
> [   48.772410] WARNING: possible recursive locking detected
> [   48.777739] 4.19.0-rc6+ #489 Not tainted
> [   48.781668] --------------------------------------------
> [   48.786993] shader_runner/3284 is trying to acquire lock:
> [   48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
> [   48.800714]
> [   48.800714] but task is already holding lock:
> [   48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
> [   48.814862]
> [   48.814862] other info that might help us debug this:
> [   48.821410]  Possible unsafe locking scenario:
> [   48.821410]
> [   48.827338]        CPU0
> [   48.829788]        ----
> [   48.832239]   lock(&(&array->lock)->rlock);
> [   48.836434]   lock(&(&array->lock)->rlock);
> [   48.840640]
> [   48.840640]  *** DEADLOCK ***
> [   48.840640]
> [   48.846582]  May be due to missing lock nesting notation
> [  130.763560] 1 lock held by cts-runner/3270:
> [  130.767745]  #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c
> [  130.776461]
>                 stack backtrace:
> [  130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486
> [  130.787706] Hardware name: Broadcom STB (Flattened Device Tree)
> [  130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14)
> [  130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4)
> [  130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68)
> [  130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c)
> [  130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68)
> [  130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c)
> [  130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec)
> [  130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c)
> [  130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614)
> [  130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0)
> [  130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390)
> [  130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac)
> [  130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60)
> [  130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28)
>
> Cc: Chunming Zhou <david1.zhou@amd.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Eric Anholt <eric@anholt.net>
> ---
>   drivers/gpu/drm/drm_syncobj.c | 359 +++++++---------------------------
>   include/drm/drm_syncobj.h     |  73 ++++---
>   2 files changed, 105 insertions(+), 327 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index da8175d9c6ff..e2c5b3ca4824 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -56,9 +56,6 @@
>   #include "drm_internal.h"
>   #include <drm/drm_syncobj.h>
>   
> -/* merge normal syncobj to timeline syncobj, the point interval is 1 */
> -#define DRM_SYNCOBJ_BINARY_POINT 1
> -
>   struct drm_syncobj_stub_fence {
>   	struct dma_fence base;
>   	spinlock_t lock;
> @@ -74,29 +71,7 @@ static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>   	.get_timeline_name = drm_syncobj_stub_fence_get_name,
>   };
>   
> -struct drm_syncobj_signal_pt {
> -	struct dma_fence_array *fence_array;
> -	u64    value;
> -	struct list_head list;
> -};
> -
> -static DEFINE_SPINLOCK(signaled_fence_lock);
> -static struct dma_fence signaled_fence;
>   
> -static struct dma_fence *drm_syncobj_get_stub_fence(void)
> -{
> -	spin_lock(&signaled_fence_lock);
> -	if (!signaled_fence.ops) {
> -		dma_fence_init(&signaled_fence,
> -			       &drm_syncobj_stub_fence_ops,
> -			       &signaled_fence_lock,
> -			       0, 0);
> -		dma_fence_signal_locked(&signaled_fence);
> -	}
> -	spin_unlock(&signaled_fence_lock);
> -
> -	return dma_fence_get(&signaled_fence);
> -}
>   /**
>    * drm_syncobj_find - lookup and reference a sync object.
>    * @file_private: drm file private pointer
> @@ -123,27 +98,6 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>   }
>   EXPORT_SYMBOL(drm_syncobj_find);
>   
> -static struct dma_fence *
> -drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj,
> -				     uint64_t point)
> -{
> -	struct drm_syncobj_signal_pt *signal_pt;
> -
> -	if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
> -	    (point <= syncobj->timeline))
> -		return drm_syncobj_get_stub_fence();
> -
> -	list_for_each_entry(signal_pt, &syncobj->signal_pt_list, list) {
> -		if (point > signal_pt->value)
> -			continue;
> -		if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) &&
> -		    (point != signal_pt->value))
> -			continue;
> -		return dma_fence_get(&signal_pt->fence_array->base);
> -	}
> -	return NULL;
> -}
> -
>   static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
>   					    struct drm_syncobj_cb *cb,
>   					    drm_syncobj_func_t func)
> @@ -152,158 +106,53 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
>   	list_add_tail(&cb->node, &syncobj->cb_list);
>   }
>   
> -static void drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> -						  struct dma_fence **fence,
> -						  struct drm_syncobj_cb *cb,
> -						  drm_syncobj_func_t func)
> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> +						 struct dma_fence **fence,
> +						 struct drm_syncobj_cb *cb,
> +						 drm_syncobj_func_t func)
>   {
> -	u64 pt_value = 0;
> -
> -	WARN_ON(*fence);
> -
> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
> -		/*BINARY syncobj always wait on last pt */
> -		pt_value = syncobj->signal_point;
> +	int ret;
>   
> -		if (pt_value == 0)
> -			pt_value += DRM_SYNCOBJ_BINARY_POINT;
> -	}
> +	*fence = drm_syncobj_fence_get(syncobj);
> +	if (*fence)
> +		return 1;
>   
> -	mutex_lock(&syncobj->cb_mutex);
> -	spin_lock(&syncobj->pt_lock);
> -	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value);
> -	spin_unlock(&syncobj->pt_lock);
> -	if (!*fence)
> +	spin_lock(&syncobj->lock);
> +	/* We've already tried once to get a fence and failed.  Now that we
> +	 * have the lock, try one more time just to be sure we don't add a
> +	 * callback when a fence has already been set.
> +	 */
> +	if (syncobj->fence) {
> +		*fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
> +								 lockdep_is_held(&syncobj->lock)));
> +		ret = 1;
> +	} else {
> +		*fence = NULL;
>   		drm_syncobj_add_callback_locked(syncobj, cb, func);
> -	mutex_unlock(&syncobj->cb_mutex);
> -}
> -
> -static void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
> -					struct drm_syncobj_cb *cb)
> -{
> -	mutex_lock(&syncobj->cb_mutex);
> -	list_del_init(&cb->node);
> -	mutex_unlock(&syncobj->cb_mutex);
> -}
> +		ret = 0;
> +	}
> +	spin_unlock(&syncobj->lock);
>   
> -static void drm_syncobj_init(struct drm_syncobj *syncobj)
> -{
> -	spin_lock(&syncobj->pt_lock);
> -	syncobj->timeline_context = dma_fence_context_alloc(1);
> -	syncobj->timeline = 0;
> -	syncobj->signal_point = 0;
> -	init_waitqueue_head(&syncobj->wq);
> -
> -	INIT_LIST_HEAD(&syncobj->signal_pt_list);
> -	spin_unlock(&syncobj->pt_lock);
> +	return ret;
>   }
>   
> -static void drm_syncobj_fini(struct drm_syncobj *syncobj)
> +void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
> +			      struct drm_syncobj_cb *cb,
> +			      drm_syncobj_func_t func)
>   {
> -	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
> -
> -	spin_lock(&syncobj->pt_lock);
> -	list_for_each_entry_safe(signal_pt, tmp,
> -				 &syncobj->signal_pt_list, list) {
> -		list_del(&signal_pt->list);
> -		dma_fence_put(&signal_pt->fence_array->base);
> -		kfree(signal_pt);
> -	}
> -	spin_unlock(&syncobj->pt_lock);
> +	spin_lock(&syncobj->lock);
> +	drm_syncobj_add_callback_locked(syncobj, cb, func);
> +	spin_unlock(&syncobj->lock);
>   }
>   
> -static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
> -					struct dma_fence *fence,
> -					u64 point)
> +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
> +				 struct drm_syncobj_cb *cb)
>   {
> -	struct drm_syncobj_signal_pt *signal_pt =
> -		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
> -	struct drm_syncobj_signal_pt *tail_pt;
> -	struct dma_fence **fences;
> -	int num_fences = 0;
> -	int ret = 0, i;
> -
> -	if (!signal_pt)
> -		return -ENOMEM;
> -	if (!fence)
> -		goto out;
> -
> -	fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
> -	if (!fences) {
> -		ret = -ENOMEM;
> -		goto out;
> -	}
> -	fences[num_fences++] = dma_fence_get(fence);
> -	/* timeline syncobj must take this dependency */
> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
> -		spin_lock(&syncobj->pt_lock);
> -		if (!list_empty(&syncobj->signal_pt_list)) {
> -			tail_pt = list_last_entry(&syncobj->signal_pt_list,
> -						  struct drm_syncobj_signal_pt, list);
> -			fences[num_fences++] =
> -				dma_fence_get(&tail_pt->fence_array->base);
> -		}
> -		spin_unlock(&syncobj->pt_lock);
> -	}
> -	signal_pt->fence_array = dma_fence_array_create(num_fences, fences,
> -							syncobj->timeline_context,
> -							point, false);
> -	if (!signal_pt->fence_array) {
> -		ret = -ENOMEM;
> -		goto fail;
> -	}
> -
> -	spin_lock(&syncobj->pt_lock);
> -	if (syncobj->signal_point >= point) {
> -		DRM_WARN("A later signal is ready!");
> -		spin_unlock(&syncobj->pt_lock);
> -		goto exist;
> -	}
> -	signal_pt->value = point;
> -	list_add_tail(&signal_pt->list, &syncobj->signal_pt_list);
> -	syncobj->signal_point = point;
> -	spin_unlock(&syncobj->pt_lock);
> -	wake_up_all(&syncobj->wq);
> -
> -	return 0;
> -exist:
> -	dma_fence_put(&signal_pt->fence_array->base);
> -fail:
> -	for (i = 0; i < num_fences; i++)
> -		dma_fence_put(fences[i]);
> -	kfree(fences);
> -out:
> -	kfree(signal_pt);
> -	return ret;
> +	spin_lock(&syncobj->lock);
> +	list_del_init(&cb->node);
> +	spin_unlock(&syncobj->lock);
>   }
>   
> -static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
> -{
> -	struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt;
> -
> -	spin_lock(&syncobj->pt_lock);
> -	tail_pt = list_last_entry(&syncobj->signal_pt_list,
> -				  struct drm_syncobj_signal_pt,
> -				  list);
> -	list_for_each_entry_safe(signal_pt, tmp,
> -				 &syncobj->signal_pt_list, list) {
> -		if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY &&
> -		    signal_pt == tail_pt)
> -			continue;
> -		if (dma_fence_is_signaled(&signal_pt->fence_array->base)) {
> -			syncobj->timeline = signal_pt->value;
> -			list_del(&signal_pt->list);
> -			dma_fence_put(&signal_pt->fence_array->base);
> -			kfree(signal_pt);
> -		} else {
> -			/*signal_pt is in order in list, from small to big, so
> -			 * the later must not be signal either */
> -			break;
> -		}
> -	}
> -
> -	spin_unlock(&syncobj->pt_lock);
> -}
>   /**
>    * drm_syncobj_replace_fence - replace fence in a sync object.
>    * @syncobj: Sync object to replace fence in
> @@ -316,30 +165,28 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   			       u64 point,
>   			       struct dma_fence *fence)
>   {
> -	u64 pt_value = point;
> -
> -	drm_syncobj_garbage_collection(syncobj);
> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
> -		if (!fence) {
> -			drm_syncobj_fini(syncobj);
> -			drm_syncobj_init(syncobj);
> -			return;
> -		}
> -		pt_value = syncobj->signal_point +
> -			DRM_SYNCOBJ_BINARY_POINT;
> -	}
> -	drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
> -	if (fence) {
> -		struct drm_syncobj_cb *cur, *tmp;
> -		LIST_HEAD(cb_list);
> +	struct dma_fence *old_fence;
> +	struct drm_syncobj_cb *cur, *tmp;
> +
> +	if (fence)
> +		dma_fence_get(fence);
> +
> +	spin_lock(&syncobj->lock);
>   
> -		mutex_lock(&syncobj->cb_mutex);
> +	old_fence = rcu_dereference_protected(syncobj->fence,
> +					      lockdep_is_held(&syncobj->lock));
> +	rcu_assign_pointer(syncobj->fence, fence);
> +
> +	if (fence != old_fence) {
>   		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>   			list_del_init(&cur->node);
>   			cur->func(syncobj, cur);
>   		}
> -		mutex_unlock(&syncobj->cb_mutex);
>   	}
> +
> +	spin_unlock(&syncobj->lock);
> +
> +	dma_fence_put(old_fence);
>   }
>   EXPORT_SYMBOL(drm_syncobj_replace_fence);
>   
> @@ -362,64 +209,6 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>   	return 0;
>   }
>   
> -static int
> -drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags,
> -		      struct dma_fence **fence)
> -{
> -	int ret = 0;
> -
> -	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> -		ret = wait_event_interruptible(syncobj->wq,
> -					       point <= syncobj->signal_point);
> -		if (ret < 0)
> -			return ret;
> -	}
> -	spin_lock(&syncobj->pt_lock);
> -	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
> -	if (!*fence)
> -		ret = -EINVAL;
> -	spin_unlock(&syncobj->pt_lock);
> -	return ret;
> -}
> -
> -/**
> - * drm_syncobj_search_fence - lookup and reference the fence in a sync object or
> - * in a timeline point
> - * @syncobj: sync object pointer
> - * @point: timeline point
> - * @flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT or not
> - * @fence: out parameter for the fence
> - *
> - * if flags is DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, the function will block
> - * here until specific timeline points is reached.
> - * if not, you need a submit thread and block in userspace until all future
> - * timeline points have materialized, only then you can submit to the kernel,
> - * otherwise, function will fail to return fence.
> - *
> - * Returns 0 on success or a negative error value on failure. On success @fence
> - * contains a reference to the fence, which must be released by calling
> - * dma_fence_put().
> - */
> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
> -			     u64 flags, struct dma_fence **fence)
> -{
> -	u64 pt_value = point;
> -
> -	if (!syncobj)
> -		return -ENOENT;
> -
> -	drm_syncobj_garbage_collection(syncobj);
> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
> -		/*BINARY syncobj always wait on last pt */
> -		pt_value = syncobj->signal_point;
> -
> -		if (pt_value == 0)
> -			pt_value += DRM_SYNCOBJ_BINARY_POINT;
> -	}
> -	return drm_syncobj_point_get(syncobj, pt_value, flags, fence);
> -}
> -EXPORT_SYMBOL(drm_syncobj_search_fence);
> -
>   /**
>    * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>    * @file_private: drm file private pointer
> @@ -429,7 +218,7 @@ EXPORT_SYMBOL(drm_syncobj_search_fence);
>    * @fence: out parameter for the fence
>    *
>    * This is just a convenience function that combines drm_syncobj_find() and
> - * drm_syncobj_lookup_fence().
> + * drm_syncobj_fence_get().
>    *
>    * Returns 0 on success or a negative error value on failure. On success @fence
>    * contains a reference to the fence, which must be released by calling
> @@ -440,11 +229,16 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>   			   struct dma_fence **fence)
>   {
>   	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> -	int ret;
> +	int ret = 0;
>   
> -	ret = drm_syncobj_search_fence(syncobj, point, flags, fence);
> -	if (syncobj)
> -		drm_syncobj_put(syncobj);
> +	if (!syncobj)
> +		return -ENOENT;
> +
> +	*fence = drm_syncobj_fence_get(syncobj);
> +	if (!*fence) {
> +		ret = -EINVAL;
> +	}
> +	drm_syncobj_put(syncobj);
>   	return ret;
>   }
>   EXPORT_SYMBOL(drm_syncobj_find_fence);
> @@ -460,7 +254,7 @@ void drm_syncobj_free(struct kref *kref)
>   	struct drm_syncobj *syncobj = container_of(kref,
>   						   struct drm_syncobj,
>   						   refcount);
> -	drm_syncobj_fini(syncobj);
> +	drm_syncobj_replace_fence(syncobj, 0, NULL);
>   	kfree(syncobj);
>   }
>   EXPORT_SYMBOL(drm_syncobj_free);
> @@ -489,13 +283,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   
>   	kref_init(&syncobj->refcount);
>   	INIT_LIST_HEAD(&syncobj->cb_list);
> -	spin_lock_init(&syncobj->pt_lock);
> -	mutex_init(&syncobj->cb_mutex);
> -	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
> -		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
> -	else
> -		syncobj->type = DRM_SYNCOBJ_TYPE_BINARY;
> -	drm_syncobj_init(syncobj);
> +	spin_lock_init(&syncobj->lock);
>   
>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>   		ret = drm_syncobj_assign_null_handle(syncobj);
> @@ -778,8 +566,7 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
>   		return -EOPNOTSUPP;
>   
>   	/* no valid flags yet */
> -	if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED |
> -			    DRM_SYNCOBJ_CREATE_TYPE_TIMELINE))
> +	if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
>   		return -EINVAL;
>   
>   	return drm_syncobj_create_as_handle(file_private,
> @@ -872,8 +659,9 @@ static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>   	struct syncobj_wait_entry *wait =
>   		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>   
> -	drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence);
> -
> +	/* This happens inside the syncobj lock */
> +	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
> +							      lockdep_is_held(&syncobj->lock)));
>   	wake_up_process(wait->task);
>   }
>   
> @@ -899,8 +687,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   	signaled_count = 0;
>   	for (i = 0; i < count; ++i) {
>   		entries[i].task = current;
> -		drm_syncobj_search_fence(syncobjs[i], 0, 0,
> -					 &entries[i].fence);
> +		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>   		if (!entries[i].fence) {
>   			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>   				continue;
> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   
>   	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>   		for (i = 0; i < count; ++i) {
> -			if (entries[i].fence)
> -				continue;
> -
>   			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>   							      &entries[i].fence,
>   							      &entries[i].syncobj_cb,
> @@ -1165,13 +949,12 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	if (ret < 0)
>   		return ret;
>   
> -	for (i = 0; i < args->count_handles; i++) {
> -		drm_syncobj_fini(syncobjs[i]);
> -		drm_syncobj_init(syncobjs[i]);
> -	}
> +	for (i = 0; i < args->count_handles; i++)
> +		drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
> +
>   	drm_syncobj_array_free(syncobjs, args->count_handles);
>   
> -	return ret;
> +	return 0;
>   }
>   
>   int
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index 29244cbcd05e..2eda44def639 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -30,15 +30,10 @@
>   
>   struct drm_syncobj_cb;
>   
> -enum drm_syncobj_type {
> -	DRM_SYNCOBJ_TYPE_BINARY,
> -	DRM_SYNCOBJ_TYPE_TIMELINE
> -};
> -
>   /**
>    * struct drm_syncobj - sync object.
>    *
> - * This structure defines a generic sync object which is timeline based.
> + * This structure defines a generic sync object which wraps a &dma_fence.
>    */
>   struct drm_syncobj {
>   	/**
> @@ -46,42 +41,21 @@ struct drm_syncobj {
>   	 */
>   	struct kref refcount;
>   	/**
> -	 * @type: indicate syncobj type
> -	 */
> -	enum drm_syncobj_type type;
> -	/**
> -	 * @wq: wait signal operation work queue
> -	 */
> -	wait_queue_head_t	wq;
> -	/**
> -	 * @timeline_context: fence context used by timeline
> +	 * @fence:
> +	 * NULL or a pointer to the fence bound to this object.
> +	 *
> +	 * This field should not be used directly. Use drm_syncobj_fence_get()
> +	 * and drm_syncobj_replace_fence() instead.
>   	 */
> -	u64 timeline_context;
> +	struct dma_fence __rcu *fence;
>   	/**
> -	 * @timeline: syncobj timeline value, which indicates point is signaled.
> +	 * @cb_list: List of callbacks to call when the &fence gets replaced.
>   	 */
> -	u64 timeline;
> -	/**
> -	 * @signal_point: which indicates the latest signaler point.
> -	 */
> -	u64 signal_point;
> -	/**
> -	 * @signal_pt_list: signaler point list.
> -	 */
> -	struct list_head signal_pt_list;
> -
> -	/**
> -         * @cb_list: List of callbacks to call when the &fence gets replaced.
> -         */
>   	struct list_head cb_list;
>   	/**
> -	 * @pt_lock: Protects pt list.
> +	 * @lock: Protects &cb_list and write-locks &fence.
>   	 */
> -	spinlock_t pt_lock;
> -	/**
> -	 * @cb_mutex: Protects syncobj cb list.
> -	 */
> -	struct mutex cb_mutex;
> +	spinlock_t lock;
>   	/**
>   	 * @file: A file backing for this syncobj.
>   	 */
> @@ -94,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
>   /**
>    * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>    * @node: used by drm_syncob_add_callback to append this struct to
> - *       &drm_syncobj.cb_list
> + *	  &drm_syncobj.cb_list
>    * @func: drm_syncobj_func_t to call
>    *
>    * This struct will be initialized by drm_syncobj_add_callback, additional
> @@ -132,6 +106,29 @@ drm_syncobj_put(struct drm_syncobj *obj)
>   	kref_put(&obj->refcount, drm_syncobj_free);
>   }
>   
> +/**
> + * drm_syncobj_fence_get - get a reference to a fence in a sync object
> + * @syncobj: sync object.
> + *
> + * This acquires additional reference to &drm_syncobj.fence contained in @obj,
> + * if not NULL. It is illegal to call this without already holding a reference.
> + * No locks required.
> + *
> + * Returns:
> + * Either the fence of @obj or NULL if there's none.
> + */
> +static inline struct dma_fence *
> +drm_syncobj_fence_get(struct drm_syncobj *syncobj)
> +{
> +	struct dma_fence *fence;
> +
> +	rcu_read_lock();
> +	fence = dma_fence_get_rcu_safe(&syncobj->fence);
> +	rcu_read_unlock();
> +
> +	return fence;
> +}
> +
>   struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>   				     u32 handle);
>   void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point,
> @@ -145,7 +142,5 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   int drm_syncobj_get_handle(struct drm_file *file_private,
>   			   struct drm_syncobj *syncobj, u32 *handle);
>   int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, u64 flags,
> -			     struct dma_fence **fence);
>   
>   #endif
Christian König Nov. 8, 2018, 4:52 p.m. UTC | #2
Am 08.11.18 um 17:07 schrieb Koenig, Christian:
> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>> Daniel suggested I submit this, since we're still seeing regressions
>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>> timeline support v9") and its followon fixes.
> This is a harmless false positive from lockdep, Chouming and I are
> already working on a fix.

On the other hand we had enough trouble with that patch, so if it really 
bothers you feel free to add my Acked-by: Christian König 
<christian.koenig@amd.com> and push it.

Christian.

>
> Christian.
>
>> Fixes this on first V3D testcase execution:
>>
>> [   48.767088] ============================================
>> [   48.772410] WARNING: possible recursive locking detected
>> [   48.777739] 4.19.0-rc6+ #489 Not tainted
>> [   48.781668] --------------------------------------------
>> [   48.786993] shader_runner/3284 is trying to acquire lock:
>> [   48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
>> [   48.800714]
>> [   48.800714] but task is already holding lock:
>> [   48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
>> [   48.814862]
>> [   48.814862] other info that might help us debug this:
>> [   48.821410]  Possible unsafe locking scenario:
>> [   48.821410]
>> [   48.827338]        CPU0
>> [   48.829788]        ----
>> [   48.832239]   lock(&(&array->lock)->rlock);
>> [   48.836434]   lock(&(&array->lock)->rlock);
>> [   48.840640]
>> [   48.840640]  *** DEADLOCK ***
>> [   48.840640]
>> [   48.846582]  May be due to missing lock nesting notation
>> [  130.763560] 1 lock held by cts-runner/3270:
>> [  130.767745]  #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c
>> [  130.776461]
>>                  stack backtrace:
>> [  130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486
>> [  130.787706] Hardware name: Broadcom STB (Flattened Device Tree)
>> [  130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14)
>> [  130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4)
>> [  130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68)
>> [  130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c)
>> [  130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68)
>> [  130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c)
>> [  130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec)
>> [  130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c)
>> [  130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614)
>> [  130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0)
>> [  130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390)
>> [  130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac)
>> [  130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60)
>> [  130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28)
>>
>> Cc: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Signed-off-by: Eric Anholt <eric@anholt.net>
>> ---
>>    drivers/gpu/drm/drm_syncobj.c | 359 +++++++---------------------------
>>    include/drm/drm_syncobj.h     |  73 ++++---
>>    2 files changed, 105 insertions(+), 327 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index da8175d9c6ff..e2c5b3ca4824 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -56,9 +56,6 @@
>>    #include "drm_internal.h"
>>    #include <drm/drm_syncobj.h>
>>    
>> -/* merge normal syncobj to timeline syncobj, the point interval is 1 */
>> -#define DRM_SYNCOBJ_BINARY_POINT 1
>> -
>>    struct drm_syncobj_stub_fence {
>>    	struct dma_fence base;
>>    	spinlock_t lock;
>> @@ -74,29 +71,7 @@ static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
>>    	.get_timeline_name = drm_syncobj_stub_fence_get_name,
>>    };
>>    
>> -struct drm_syncobj_signal_pt {
>> -	struct dma_fence_array *fence_array;
>> -	u64    value;
>> -	struct list_head list;
>> -};
>> -
>> -static DEFINE_SPINLOCK(signaled_fence_lock);
>> -static struct dma_fence signaled_fence;
>>    
>> -static struct dma_fence *drm_syncobj_get_stub_fence(void)
>> -{
>> -	spin_lock(&signaled_fence_lock);
>> -	if (!signaled_fence.ops) {
>> -		dma_fence_init(&signaled_fence,
>> -			       &drm_syncobj_stub_fence_ops,
>> -			       &signaled_fence_lock,
>> -			       0, 0);
>> -		dma_fence_signal_locked(&signaled_fence);
>> -	}
>> -	spin_unlock(&signaled_fence_lock);
>> -
>> -	return dma_fence_get(&signaled_fence);
>> -}
>>    /**
>>     * drm_syncobj_find - lookup and reference a sync object.
>>     * @file_private: drm file private pointer
>> @@ -123,27 +98,6 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>    }
>>    EXPORT_SYMBOL(drm_syncobj_find);
>>    
>> -static struct dma_fence *
>> -drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj,
>> -				     uint64_t point)
>> -{
>> -	struct drm_syncobj_signal_pt *signal_pt;
>> -
>> -	if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
>> -	    (point <= syncobj->timeline))
>> -		return drm_syncobj_get_stub_fence();
>> -
>> -	list_for_each_entry(signal_pt, &syncobj->signal_pt_list, list) {
>> -		if (point > signal_pt->value)
>> -			continue;
>> -		if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) &&
>> -		    (point != signal_pt->value))
>> -			continue;
>> -		return dma_fence_get(&signal_pt->fence_array->base);
>> -	}
>> -	return NULL;
>> -}
>> -
>>    static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
>>    					    struct drm_syncobj_cb *cb,
>>    					    drm_syncobj_func_t func)
>> @@ -152,158 +106,53 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
>>    	list_add_tail(&cb->node, &syncobj->cb_list);
>>    }
>>    
>> -static void drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>> -						  struct dma_fence **fence,
>> -						  struct drm_syncobj_cb *cb,
>> -						  drm_syncobj_func_t func)
>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>> +						 struct dma_fence **fence,
>> +						 struct drm_syncobj_cb *cb,
>> +						 drm_syncobj_func_t func)
>>    {
>> -	u64 pt_value = 0;
>> -
>> -	WARN_ON(*fence);
>> -
>> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>> -		/*BINARY syncobj always wait on last pt */
>> -		pt_value = syncobj->signal_point;
>> +	int ret;
>>    
>> -		if (pt_value == 0)
>> -			pt_value += DRM_SYNCOBJ_BINARY_POINT;
>> -	}
>> +	*fence = drm_syncobj_fence_get(syncobj);
>> +	if (*fence)
>> +		return 1;
>>    
>> -	mutex_lock(&syncobj->cb_mutex);
>> -	spin_lock(&syncobj->pt_lock);
>> -	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value);
>> -	spin_unlock(&syncobj->pt_lock);
>> -	if (!*fence)
>> +	spin_lock(&syncobj->lock);
>> +	/* We've already tried once to get a fence and failed.  Now that we
>> +	 * have the lock, try one more time just to be sure we don't add a
>> +	 * callback when a fence has already been set.
>> +	 */
>> +	if (syncobj->fence) {
>> +		*fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> +								 lockdep_is_held(&syncobj->lock)));
>> +		ret = 1;
>> +	} else {
>> +		*fence = NULL;
>>    		drm_syncobj_add_callback_locked(syncobj, cb, func);
>> -	mutex_unlock(&syncobj->cb_mutex);
>> -}
>> -
>> -static void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>> -					struct drm_syncobj_cb *cb)
>> -{
>> -	mutex_lock(&syncobj->cb_mutex);
>> -	list_del_init(&cb->node);
>> -	mutex_unlock(&syncobj->cb_mutex);
>> -}
>> +		ret = 0;
>> +	}
>> +	spin_unlock(&syncobj->lock);
>>    
>> -static void drm_syncobj_init(struct drm_syncobj *syncobj)
>> -{
>> -	spin_lock(&syncobj->pt_lock);
>> -	syncobj->timeline_context = dma_fence_context_alloc(1);
>> -	syncobj->timeline = 0;
>> -	syncobj->signal_point = 0;
>> -	init_waitqueue_head(&syncobj->wq);
>> -
>> -	INIT_LIST_HEAD(&syncobj->signal_pt_list);
>> -	spin_unlock(&syncobj->pt_lock);
>> +	return ret;
>>    }
>>    
>> -static void drm_syncobj_fini(struct drm_syncobj *syncobj)
>> +void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
>> +			      struct drm_syncobj_cb *cb,
>> +			      drm_syncobj_func_t func)
>>    {
>> -	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>> -
>> -	spin_lock(&syncobj->pt_lock);
>> -	list_for_each_entry_safe(signal_pt, tmp,
>> -				 &syncobj->signal_pt_list, list) {
>> -		list_del(&signal_pt->list);
>> -		dma_fence_put(&signal_pt->fence_array->base);
>> -		kfree(signal_pt);
>> -	}
>> -	spin_unlock(&syncobj->pt_lock);
>> +	spin_lock(&syncobj->lock);
>> +	drm_syncobj_add_callback_locked(syncobj, cb, func);
>> +	spin_unlock(&syncobj->lock);
>>    }
>>    
>> -static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
>> -					struct dma_fence *fence,
>> -					u64 point)
>> +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>> +				 struct drm_syncobj_cb *cb)
>>    {
>> -	struct drm_syncobj_signal_pt *signal_pt =
>> -		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>> -	struct drm_syncobj_signal_pt *tail_pt;
>> -	struct dma_fence **fences;
>> -	int num_fences = 0;
>> -	int ret = 0, i;
>> -
>> -	if (!signal_pt)
>> -		return -ENOMEM;
>> -	if (!fence)
>> -		goto out;
>> -
>> -	fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
>> -	if (!fences) {
>> -		ret = -ENOMEM;
>> -		goto out;
>> -	}
>> -	fences[num_fences++] = dma_fence_get(fence);
>> -	/* timeline syncobj must take this dependency */
>> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>> -		spin_lock(&syncobj->pt_lock);
>> -		if (!list_empty(&syncobj->signal_pt_list)) {
>> -			tail_pt = list_last_entry(&syncobj->signal_pt_list,
>> -						  struct drm_syncobj_signal_pt, list);
>> -			fences[num_fences++] =
>> -				dma_fence_get(&tail_pt->fence_array->base);
>> -		}
>> -		spin_unlock(&syncobj->pt_lock);
>> -	}
>> -	signal_pt->fence_array = dma_fence_array_create(num_fences, fences,
>> -							syncobj->timeline_context,
>> -							point, false);
>> -	if (!signal_pt->fence_array) {
>> -		ret = -ENOMEM;
>> -		goto fail;
>> -	}
>> -
>> -	spin_lock(&syncobj->pt_lock);
>> -	if (syncobj->signal_point >= point) {
>> -		DRM_WARN("A later signal is ready!");
>> -		spin_unlock(&syncobj->pt_lock);
>> -		goto exist;
>> -	}
>> -	signal_pt->value = point;
>> -	list_add_tail(&signal_pt->list, &syncobj->signal_pt_list);
>> -	syncobj->signal_point = point;
>> -	spin_unlock(&syncobj->pt_lock);
>> -	wake_up_all(&syncobj->wq);
>> -
>> -	return 0;
>> -exist:
>> -	dma_fence_put(&signal_pt->fence_array->base);
>> -fail:
>> -	for (i = 0; i < num_fences; i++)
>> -		dma_fence_put(fences[i]);
>> -	kfree(fences);
>> -out:
>> -	kfree(signal_pt);
>> -	return ret;
>> +	spin_lock(&syncobj->lock);
>> +	list_del_init(&cb->node);
>> +	spin_unlock(&syncobj->lock);
>>    }
>>    
>> -static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
>> -{
>> -	struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt;
>> -
>> -	spin_lock(&syncobj->pt_lock);
>> -	tail_pt = list_last_entry(&syncobj->signal_pt_list,
>> -				  struct drm_syncobj_signal_pt,
>> -				  list);
>> -	list_for_each_entry_safe(signal_pt, tmp,
>> -				 &syncobj->signal_pt_list, list) {
>> -		if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY &&
>> -		    signal_pt == tail_pt)
>> -			continue;
>> -		if (dma_fence_is_signaled(&signal_pt->fence_array->base)) {
>> -			syncobj->timeline = signal_pt->value;
>> -			list_del(&signal_pt->list);
>> -			dma_fence_put(&signal_pt->fence_array->base);
>> -			kfree(signal_pt);
>> -		} else {
>> -			/*signal_pt is in order in list, from small to big, so
>> -			 * the later must not be signal either */
>> -			break;
>> -		}
>> -	}
>> -
>> -	spin_unlock(&syncobj->pt_lock);
>> -}
>>    /**
>>     * drm_syncobj_replace_fence - replace fence in a sync object.
>>     * @syncobj: Sync object to replace fence in
>> @@ -316,30 +165,28 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>    			       u64 point,
>>    			       struct dma_fence *fence)
>>    {
>> -	u64 pt_value = point;
>> -
>> -	drm_syncobj_garbage_collection(syncobj);
>> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>> -		if (!fence) {
>> -			drm_syncobj_fini(syncobj);
>> -			drm_syncobj_init(syncobj);
>> -			return;
>> -		}
>> -		pt_value = syncobj->signal_point +
>> -			DRM_SYNCOBJ_BINARY_POINT;
>> -	}
>> -	drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
>> -	if (fence) {
>> -		struct drm_syncobj_cb *cur, *tmp;
>> -		LIST_HEAD(cb_list);
>> +	struct dma_fence *old_fence;
>> +	struct drm_syncobj_cb *cur, *tmp;
>> +
>> +	if (fence)
>> +		dma_fence_get(fence);
>> +
>> +	spin_lock(&syncobj->lock);
>>    
>> -		mutex_lock(&syncobj->cb_mutex);
>> +	old_fence = rcu_dereference_protected(syncobj->fence,
>> +					      lockdep_is_held(&syncobj->lock));
>> +	rcu_assign_pointer(syncobj->fence, fence);
>> +
>> +	if (fence != old_fence) {
>>    		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
>>    			list_del_init(&cur->node);
>>    			cur->func(syncobj, cur);
>>    		}
>> -		mutex_unlock(&syncobj->cb_mutex);
>>    	}
>> +
>> +	spin_unlock(&syncobj->lock);
>> +
>> +	dma_fence_put(old_fence);
>>    }
>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>    
>> @@ -362,64 +209,6 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>    	return 0;
>>    }
>>    
>> -static int
>> -drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags,
>> -		      struct dma_fence **fence)
>> -{
>> -	int ret = 0;
>> -
>> -	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>> -		ret = wait_event_interruptible(syncobj->wq,
>> -					       point <= syncobj->signal_point);
>> -		if (ret < 0)
>> -			return ret;
>> -	}
>> -	spin_lock(&syncobj->pt_lock);
>> -	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
>> -	if (!*fence)
>> -		ret = -EINVAL;
>> -	spin_unlock(&syncobj->pt_lock);
>> -	return ret;
>> -}
>> -
>> -/**
>> - * drm_syncobj_search_fence - lookup and reference the fence in a sync object or
>> - * in a timeline point
>> - * @syncobj: sync object pointer
>> - * @point: timeline point
>> - * @flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT or not
>> - * @fence: out parameter for the fence
>> - *
>> - * if flags is DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, the function will block
>> - * here until specific timeline points is reached.
>> - * if not, you need a submit thread and block in userspace until all future
>> - * timeline points have materialized, only then you can submit to the kernel,
>> - * otherwise, function will fail to return fence.
>> - *
>> - * Returns 0 on success or a negative error value on failure. On success @fence
>> - * contains a reference to the fence, which must be released by calling
>> - * dma_fence_put().
>> - */
>> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>> -			     u64 flags, struct dma_fence **fence)
>> -{
>> -	u64 pt_value = point;
>> -
>> -	if (!syncobj)
>> -		return -ENOENT;
>> -
>> -	drm_syncobj_garbage_collection(syncobj);
>> -	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>> -		/*BINARY syncobj always wait on last pt */
>> -		pt_value = syncobj->signal_point;
>> -
>> -		if (pt_value == 0)
>> -			pt_value += DRM_SYNCOBJ_BINARY_POINT;
>> -	}
>> -	return drm_syncobj_point_get(syncobj, pt_value, flags, fence);
>> -}
>> -EXPORT_SYMBOL(drm_syncobj_search_fence);
>> -
>>    /**
>>     * drm_syncobj_find_fence - lookup and reference the fence in a sync object
>>     * @file_private: drm file private pointer
>> @@ -429,7 +218,7 @@ EXPORT_SYMBOL(drm_syncobj_search_fence);
>>     * @fence: out parameter for the fence
>>     *
>>     * This is just a convenience function that combines drm_syncobj_find() and
>> - * drm_syncobj_lookup_fence().
>> + * drm_syncobj_fence_get().
>>     *
>>     * Returns 0 on success or a negative error value on failure. On success @fence
>>     * contains a reference to the fence, which must be released by calling
>> @@ -440,11 +229,16 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>    			   struct dma_fence **fence)
>>    {
>>    	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>> -	int ret;
>> +	int ret = 0;
>>    
>> -	ret = drm_syncobj_search_fence(syncobj, point, flags, fence);
>> -	if (syncobj)
>> -		drm_syncobj_put(syncobj);
>> +	if (!syncobj)
>> +		return -ENOENT;
>> +
>> +	*fence = drm_syncobj_fence_get(syncobj);
>> +	if (!*fence) {
>> +		ret = -EINVAL;
>> +	}
>> +	drm_syncobj_put(syncobj);
>>    	return ret;
>>    }
>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>> @@ -460,7 +254,7 @@ void drm_syncobj_free(struct kref *kref)
>>    	struct drm_syncobj *syncobj = container_of(kref,
>>    						   struct drm_syncobj,
>>    						   refcount);
>> -	drm_syncobj_fini(syncobj);
>> +	drm_syncobj_replace_fence(syncobj, 0, NULL);
>>    	kfree(syncobj);
>>    }
>>    EXPORT_SYMBOL(drm_syncobj_free);
>> @@ -489,13 +283,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>    
>>    	kref_init(&syncobj->refcount);
>>    	INIT_LIST_HEAD(&syncobj->cb_list);
>> -	spin_lock_init(&syncobj->pt_lock);
>> -	mutex_init(&syncobj->cb_mutex);
>> -	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>> -		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>> -	else
>> -		syncobj->type = DRM_SYNCOBJ_TYPE_BINARY;
>> -	drm_syncobj_init(syncobj);
>> +	spin_lock_init(&syncobj->lock);
>>    
>>    	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>    		ret = drm_syncobj_assign_null_handle(syncobj);
>> @@ -778,8 +566,7 @@ drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
>>    		return -EOPNOTSUPP;
>>    
>>    	/* no valid flags yet */
>> -	if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED |
>> -			    DRM_SYNCOBJ_CREATE_TYPE_TIMELINE))
>> +	if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
>>    		return -EINVAL;
>>    
>>    	return drm_syncobj_create_as_handle(file_private,
>> @@ -872,8 +659,9 @@ static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>    	struct syncobj_wait_entry *wait =
>>    		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>    
>> -	drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence);
>> -
>> +	/* This happens inside the syncobj lock */
>> +	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
>> +							      lockdep_is_held(&syncobj->lock)));
>>    	wake_up_process(wait->task);
>>    }
>>    
>> @@ -899,8 +687,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>    	signaled_count = 0;
>>    	for (i = 0; i < count; ++i) {
>>    		entries[i].task = current;
>> -		drm_syncobj_search_fence(syncobjs[i], 0, 0,
>> -					 &entries[i].fence);
>> +		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>>    		if (!entries[i].fence) {
>>    			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>    				continue;
>> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>    
>>    	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>    		for (i = 0; i < count; ++i) {
>> -			if (entries[i].fence)
>> -				continue;
>> -
>>    			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>    							      &entries[i].fence,
>>    							      &entries[i].syncobj_cb,
>> @@ -1165,13 +949,12 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>>    	if (ret < 0)
>>    		return ret;
>>    
>> -	for (i = 0; i < args->count_handles; i++) {
>> -		drm_syncobj_fini(syncobjs[i]);
>> -		drm_syncobj_init(syncobjs[i]);
>> -	}
>> +	for (i = 0; i < args->count_handles; i++)
>> +		drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>> +
>>    	drm_syncobj_array_free(syncobjs, args->count_handles);
>>    
>> -	return ret;
>> +	return 0;
>>    }
>>    
>>    int
>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>> index 29244cbcd05e..2eda44def639 100644
>> --- a/include/drm/drm_syncobj.h
>> +++ b/include/drm/drm_syncobj.h
>> @@ -30,15 +30,10 @@
>>    
>>    struct drm_syncobj_cb;
>>    
>> -enum drm_syncobj_type {
>> -	DRM_SYNCOBJ_TYPE_BINARY,
>> -	DRM_SYNCOBJ_TYPE_TIMELINE
>> -};
>> -
>>    /**
>>     * struct drm_syncobj - sync object.
>>     *
>> - * This structure defines a generic sync object which is timeline based.
>> + * This structure defines a generic sync object which wraps a &dma_fence.
>>     */
>>    struct drm_syncobj {
>>    	/**
>> @@ -46,42 +41,21 @@ struct drm_syncobj {
>>    	 */
>>    	struct kref refcount;
>>    	/**
>> -	 * @type: indicate syncobj type
>> -	 */
>> -	enum drm_syncobj_type type;
>> -	/**
>> -	 * @wq: wait signal operation work queue
>> -	 */
>> -	wait_queue_head_t	wq;
>> -	/**
>> -	 * @timeline_context: fence context used by timeline
>> +	 * @fence:
>> +	 * NULL or a pointer to the fence bound to this object.
>> +	 *
>> +	 * This field should not be used directly. Use drm_syncobj_fence_get()
>> +	 * and drm_syncobj_replace_fence() instead.
>>    	 */
>> -	u64 timeline_context;
>> +	struct dma_fence __rcu *fence;
>>    	/**
>> -	 * @timeline: syncobj timeline value, which indicates point is signaled.
>> +	 * @cb_list: List of callbacks to call when the &fence gets replaced.
>>    	 */
>> -	u64 timeline;
>> -	/**
>> -	 * @signal_point: which indicates the latest signaler point.
>> -	 */
>> -	u64 signal_point;
>> -	/**
>> -	 * @signal_pt_list: signaler point list.
>> -	 */
>> -	struct list_head signal_pt_list;
>> -
>> -	/**
>> -         * @cb_list: List of callbacks to call when the &fence gets replaced.
>> -         */
>>    	struct list_head cb_list;
>>    	/**
>> -	 * @pt_lock: Protects pt list.
>> +	 * @lock: Protects &cb_list and write-locks &fence.
>>    	 */
>> -	spinlock_t pt_lock;
>> -	/**
>> -	 * @cb_mutex: Protects syncobj cb list.
>> -	 */
>> -	struct mutex cb_mutex;
>> +	spinlock_t lock;
>>    	/**
>>    	 * @file: A file backing for this syncobj.
>>    	 */
>> @@ -94,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
>>    /**
>>     * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>     * @node: used by drm_syncob_add_callback to append this struct to
>> - *       &drm_syncobj.cb_list
>> + *	  &drm_syncobj.cb_list
>>     * @func: drm_syncobj_func_t to call
>>     *
>>     * This struct will be initialized by drm_syncobj_add_callback, additional
>> @@ -132,6 +106,29 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>    	kref_put(&obj->refcount, drm_syncobj_free);
>>    }
>>    
>> +/**
>> + * drm_syncobj_fence_get - get a reference to a fence in a sync object
>> + * @syncobj: sync object.
>> + *
>> + * This acquires additional reference to &drm_syncobj.fence contained in @obj,
>> + * if not NULL. It is illegal to call this without already holding a reference.
>> + * No locks required.
>> + *
>> + * Returns:
>> + * Either the fence of @obj or NULL if there's none.
>> + */
>> +static inline struct dma_fence *
>> +drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>> +{
>> +	struct dma_fence *fence;
>> +
>> +	rcu_read_lock();
>> +	fence = dma_fence_get_rcu_safe(&syncobj->fence);
>> +	rcu_read_unlock();
>> +
>> +	return fence;
>> +}
>> +
>>    struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>    				     u32 handle);
>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point,
>> @@ -145,7 +142,5 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>>    int drm_syncobj_get_handle(struct drm_file *file_private,
>>    			   struct drm_syncobj *syncobj, u32 *handle);
>>    int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, u64 flags,
>> -			     struct dma_fence **fence);
>>    
>>    #endif
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Chunming Zhou Nov. 9, 2018, 2:35 a.m. UTC | #3
On 2018年11月09日 00:52, Christian König wrote:
> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>> Daniel suggested I submit this, since we're still seeing regressions
>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>> timeline support v9") and its followon fixes.
>> This is a harmless false positive from lockdep, Chouming and I are
>> already working on a fix.
>
> On the other hand we had enough trouble with that patch, so if it 
> really bothers you feel free to add my Acked-by: Christian König 
> <christian.koenig@amd.com> and push it.
NAK, please no, I don't think this needed, the Warning totally isn't 
related to syncobj timeline, but fence-array implementation flaw, just 
exposed by syncobj.
In addition, Christian already has a fix for this Warning, I've tested. 
Please Christian send to public review.

-David
>
> Christian.
>
>>
>> Christian.
>>
>>> Fixes this on first V3D testcase execution:
>>>
>>> [   48.767088] ============================================
>>> [   48.772410] WARNING: possible recursive locking detected
>>> [   48.777739] 4.19.0-rc6+ #489 Not tainted
>>> [   48.781668] --------------------------------------------
>>> [   48.786993] shader_runner/3284 is trying to acquire lock:
>>> [   48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: 
>>> dma_fence_add_callback+0x30/0x23c
>>> [   48.800714]
>>> [   48.800714] but task is already holding lock:
>>> [   48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: 
>>> dma_fence_add_callback+0x30/0x23c
>>> [   48.814862]
>>> [   48.814862] other info that might help us debug this:
>>> [   48.821410]  Possible unsafe locking scenario:
>>> [   48.821410]
>>> [   48.827338]        CPU0
>>> [   48.829788]        ----
>>> [   48.832239]   lock(&(&array->lock)->rlock);
>>> [   48.836434]   lock(&(&array->lock)->rlock);
>>> [   48.840640]
>>> [   48.840640]  *** DEADLOCK ***
>>> [   48.840640]
>>> [   48.846582]  May be due to missing lock nesting notation
>>> [  130.763560] 1 lock held by cts-runner/3270:
>>> [  130.767745]  #0: 7834b793 (&(&array->lock)->rlock){-...}, at: 
>>> dma_fence_add_callback+0x30/0x23c
>>> [  130.776461]
>>>                  stack backtrace:
>>> [  130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 
>>> 4.19.0-rc6+ #486
>>> [  130.787706] Hardware name: Broadcom STB (Flattened Device Tree)
>>> [  130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] 
>>> (show_stack+0x10/0x14)
>>> [  130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] 
>>> (dump_stack+0xa8/0xd4)
>>> [  130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] 
>>> (__lock_acquire+0x848/0x1a68)
>>> [  130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] 
>>> (lock_acquire+0xd8/0x22c)
>>> [  130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] 
>>> (_raw_spin_lock_irqsave+0x54/0x68)
>>> [  130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from 
>>> [<c086bf54>] (dma_fence_add_callback+0x30/0x23c)
>>> [  130.842183] [<c086bf54>] (dma_fence_add_callback) from 
>>> [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec)
>>> [  130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from 
>>> [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c)
>>> [  130.862647] [<c086c00c>] (dma_fence_add_callback) from 
>>> [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614)
>>> [  130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from 
>>> [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0)
>>> [  130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] 
>>> (drm_ioctl+0x1d8/0x390)
>>> [  130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] 
>>> (do_vfs_ioctl+0xb0/0x8ac)
>>> [  130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] 
>>> (ksys_ioctl+0x34/0x60)
>>> [  130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] 
>>> (ret_fast_syscall+0x0/0x28)
>>>
>>> Cc: Chunming Zhou <david1.zhou@amd.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Signed-off-by: Eric Anholt <eric@anholt.net>
>>> ---
>>>    drivers/gpu/drm/drm_syncobj.c | 359 
>>> +++++++---------------------------
>>>    include/drm/drm_syncobj.h     |  73 ++++---
>>>    2 files changed, 105 insertions(+), 327 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index da8175d9c6ff..e2c5b3ca4824 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -56,9 +56,6 @@
>>>    #include "drm_internal.h"
>>>    #include <drm/drm_syncobj.h>
>>>    -/* merge normal syncobj to timeline syncobj, the point interval 
>>> is 1 */
>>> -#define DRM_SYNCOBJ_BINARY_POINT 1
>>> -
>>>    struct drm_syncobj_stub_fence {
>>>        struct dma_fence base;
>>>        spinlock_t lock;
>>> @@ -74,29 +71,7 @@ static const struct dma_fence_ops 
>>> drm_syncobj_stub_fence_ops = {
>>>        .get_timeline_name = drm_syncobj_stub_fence_get_name,
>>>    };
>>>    -struct drm_syncobj_signal_pt {
>>> -    struct dma_fence_array *fence_array;
>>> -    u64    value;
>>> -    struct list_head list;
>>> -};
>>> -
>>> -static DEFINE_SPINLOCK(signaled_fence_lock);
>>> -static struct dma_fence signaled_fence;
>>>    -static struct dma_fence *drm_syncobj_get_stub_fence(void)
>>> -{
>>> -    spin_lock(&signaled_fence_lock);
>>> -    if (!signaled_fence.ops) {
>>> -        dma_fence_init(&signaled_fence,
>>> -                   &drm_syncobj_stub_fence_ops,
>>> -                   &signaled_fence_lock,
>>> -                   0, 0);
>>> -        dma_fence_signal_locked(&signaled_fence);
>>> -    }
>>> -    spin_unlock(&signaled_fence_lock);
>>> -
>>> -    return dma_fence_get(&signaled_fence);
>>> -}
>>>    /**
>>>     * drm_syncobj_find - lookup and reference a sync object.
>>>     * @file_private: drm file private pointer
>>> @@ -123,27 +98,6 @@ struct drm_syncobj *drm_syncobj_find(struct 
>>> drm_file *file_private,
>>>    }
>>>    EXPORT_SYMBOL(drm_syncobj_find);
>>>    -static struct dma_fence *
>>> -drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj,
>>> -                     uint64_t point)
>>> -{
>>> -    struct drm_syncobj_signal_pt *signal_pt;
>>> -
>>> -    if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
>>> -        (point <= syncobj->timeline))
>>> -        return drm_syncobj_get_stub_fence();
>>> -
>>> -    list_for_each_entry(signal_pt, &syncobj->signal_pt_list, list) {
>>> -        if (point > signal_pt->value)
>>> -            continue;
>>> -        if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) &&
>>> -            (point != signal_pt->value))
>>> -            continue;
>>> -        return dma_fence_get(&signal_pt->fence_array->base);
>>> -    }
>>> -    return NULL;
>>> -}
>>> -
>>>    static void drm_syncobj_add_callback_locked(struct drm_syncobj 
>>> *syncobj,
>>>                            struct drm_syncobj_cb *cb,
>>>                            drm_syncobj_func_t func)
>>> @@ -152,158 +106,53 @@ static void 
>>> drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
>>>        list_add_tail(&cb->node, &syncobj->cb_list);
>>>    }
>>>    -static void drm_syncobj_fence_get_or_add_callback(struct 
>>> drm_syncobj *syncobj,
>>> -                          struct dma_fence **fence,
>>> -                          struct drm_syncobj_cb *cb,
>>> -                          drm_syncobj_func_t func)
>>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj 
>>> *syncobj,
>>> +                         struct dma_fence **fence,
>>> +                         struct drm_syncobj_cb *cb,
>>> +                         drm_syncobj_func_t func)
>>>    {
>>> -    u64 pt_value = 0;
>>> -
>>> -    WARN_ON(*fence);
>>> -
>>> -    if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>>> -        /*BINARY syncobj always wait on last pt */
>>> -        pt_value = syncobj->signal_point;
>>> +    int ret;
>>>    -        if (pt_value == 0)
>>> -            pt_value += DRM_SYNCOBJ_BINARY_POINT;
>>> -    }
>>> +    *fence = drm_syncobj_fence_get(syncobj);
>>> +    if (*fence)
>>> +        return 1;
>>>    -    mutex_lock(&syncobj->cb_mutex);
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    *fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value);
>>> -    spin_unlock(&syncobj->pt_lock);
>>> -    if (!*fence)
>>> +    spin_lock(&syncobj->lock);
>>> +    /* We've already tried once to get a fence and failed. Now that we
>>> +     * have the lock, try one more time just to be sure we don't add a
>>> +     * callback when a fence has already been set.
>>> +     */
>>> +    if (syncobj->fence) {
>>> +        *fence = 
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> + lockdep_is_held(&syncobj->lock)));
>>> +        ret = 1;
>>> +    } else {
>>> +        *fence = NULL;
>>>            drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> -    mutex_unlock(&syncobj->cb_mutex);
>>> -}
>>> -
>>> -static void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>> -                    struct drm_syncobj_cb *cb)
>>> -{
>>> -    mutex_lock(&syncobj->cb_mutex);
>>> -    list_del_init(&cb->node);
>>> -    mutex_unlock(&syncobj->cb_mutex);
>>> -}
>>> +        ret = 0;
>>> +    }
>>> +    spin_unlock(&syncobj->lock);
>>>    -static void drm_syncobj_init(struct drm_syncobj *syncobj)
>>> -{
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    syncobj->timeline_context = dma_fence_context_alloc(1);
>>> -    syncobj->timeline = 0;
>>> -    syncobj->signal_point = 0;
>>> -    init_waitqueue_head(&syncobj->wq);
>>> -
>>> -    INIT_LIST_HEAD(&syncobj->signal_pt_list);
>>> -    spin_unlock(&syncobj->pt_lock);
>>> +    return ret;
>>>    }
>>>    -static void drm_syncobj_fini(struct drm_syncobj *syncobj)
>>> +void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
>>> +                  struct drm_syncobj_cb *cb,
>>> +                  drm_syncobj_func_t func)
>>>    {
>>> -    struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
>>> -
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    list_for_each_entry_safe(signal_pt, tmp,
>>> -                 &syncobj->signal_pt_list, list) {
>>> -        list_del(&signal_pt->list);
>>> - dma_fence_put(&signal_pt->fence_array->base);
>>> -        kfree(signal_pt);
>>> -    }
>>> -    spin_unlock(&syncobj->pt_lock);
>>> +    spin_lock(&syncobj->lock);
>>> +    drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> +    spin_unlock(&syncobj->lock);
>>>    }
>>>    -static int drm_syncobj_create_signal_pt(struct drm_syncobj 
>>> *syncobj,
>>> -                    struct dma_fence *fence,
>>> -                    u64 point)
>>> +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
>>> +                 struct drm_syncobj_cb *cb)
>>>    {
>>> -    struct drm_syncobj_signal_pt *signal_pt =
>>> -        kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
>>> -    struct drm_syncobj_signal_pt *tail_pt;
>>> -    struct dma_fence **fences;
>>> -    int num_fences = 0;
>>> -    int ret = 0, i;
>>> -
>>> -    if (!signal_pt)
>>> -        return -ENOMEM;
>>> -    if (!fence)
>>> -        goto out;
>>> -
>>> -    fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
>>> -    if (!fences) {
>>> -        ret = -ENOMEM;
>>> -        goto out;
>>> -    }
>>> -    fences[num_fences++] = dma_fence_get(fence);
>>> -    /* timeline syncobj must take this dependency */
>>> -    if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
>>> -        spin_lock(&syncobj->pt_lock);
>>> -        if (!list_empty(&syncobj->signal_pt_list)) {
>>> -            tail_pt = list_last_entry(&syncobj->signal_pt_list,
>>> -                          struct drm_syncobj_signal_pt, list);
>>> -            fences[num_fences++] =
>>> - dma_fence_get(&tail_pt->fence_array->base);
>>> -        }
>>> -        spin_unlock(&syncobj->pt_lock);
>>> -    }
>>> -    signal_pt->fence_array = dma_fence_array_create(num_fences, 
>>> fences,
>>> -                            syncobj->timeline_context,
>>> -                            point, false);
>>> -    if (!signal_pt->fence_array) {
>>> -        ret = -ENOMEM;
>>> -        goto fail;
>>> -    }
>>> -
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    if (syncobj->signal_point >= point) {
>>> -        DRM_WARN("A later signal is ready!");
>>> -        spin_unlock(&syncobj->pt_lock);
>>> -        goto exist;
>>> -    }
>>> -    signal_pt->value = point;
>>> -    list_add_tail(&signal_pt->list, &syncobj->signal_pt_list);
>>> -    syncobj->signal_point = point;
>>> -    spin_unlock(&syncobj->pt_lock);
>>> -    wake_up_all(&syncobj->wq);
>>> -
>>> -    return 0;
>>> -exist:
>>> -    dma_fence_put(&signal_pt->fence_array->base);
>>> -fail:
>>> -    for (i = 0; i < num_fences; i++)
>>> -        dma_fence_put(fences[i]);
>>> -    kfree(fences);
>>> -out:
>>> -    kfree(signal_pt);
>>> -    return ret;
>>> +    spin_lock(&syncobj->lock);
>>> +    list_del_init(&cb->node);
>>> +    spin_unlock(&syncobj->lock);
>>>    }
>>>    -static void drm_syncobj_garbage_collection(struct drm_syncobj 
>>> *syncobj)
>>> -{
>>> -    struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt;
>>> -
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    tail_pt = list_last_entry(&syncobj->signal_pt_list,
>>> -                  struct drm_syncobj_signal_pt,
>>> -                  list);
>>> -    list_for_each_entry_safe(signal_pt, tmp,
>>> -                 &syncobj->signal_pt_list, list) {
>>> -        if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY &&
>>> -            signal_pt == tail_pt)
>>> -            continue;
>>> -        if (dma_fence_is_signaled(&signal_pt->fence_array->base)) {
>>> -            syncobj->timeline = signal_pt->value;
>>> -            list_del(&signal_pt->list);
>>> - dma_fence_put(&signal_pt->fence_array->base);
>>> -            kfree(signal_pt);
>>> -        } else {
>>> -            /*signal_pt is in order in list, from small to big, so
>>> -             * the later must not be signal either */
>>> -            break;
>>> -        }
>>> -    }
>>> -
>>> -    spin_unlock(&syncobj->pt_lock);
>>> -}
>>>    /**
>>>     * drm_syncobj_replace_fence - replace fence in a sync object.
>>>     * @syncobj: Sync object to replace fence in
>>> @@ -316,30 +165,28 @@ void drm_syncobj_replace_fence(struct 
>>> drm_syncobj *syncobj,
>>>                       u64 point,
>>>                       struct dma_fence *fence)
>>>    {
>>> -    u64 pt_value = point;
>>> -
>>> -    drm_syncobj_garbage_collection(syncobj);
>>> -    if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>>> -        if (!fence) {
>>> -            drm_syncobj_fini(syncobj);
>>> -            drm_syncobj_init(syncobj);
>>> -            return;
>>> -        }
>>> -        pt_value = syncobj->signal_point +
>>> -            DRM_SYNCOBJ_BINARY_POINT;
>>> -    }
>>> -    drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
>>> -    if (fence) {
>>> -        struct drm_syncobj_cb *cur, *tmp;
>>> -        LIST_HEAD(cb_list);
>>> +    struct dma_fence *old_fence;
>>> +    struct drm_syncobj_cb *cur, *tmp;
>>> +
>>> +    if (fence)
>>> +        dma_fence_get(fence);
>>> +
>>> +    spin_lock(&syncobj->lock);
>>>    -        mutex_lock(&syncobj->cb_mutex);
>>> +    old_fence = rcu_dereference_protected(syncobj->fence,
>>> + lockdep_is_held(&syncobj->lock));
>>> +    rcu_assign_pointer(syncobj->fence, fence);
>>> +
>>> +    if (fence != old_fence) {
>>>            list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, 
>>> node) {
>>>                list_del_init(&cur->node);
>>>                cur->func(syncobj, cur);
>>>            }
>>> -        mutex_unlock(&syncobj->cb_mutex);
>>>        }
>>> +
>>> +    spin_unlock(&syncobj->lock);
>>> +
>>> +    dma_fence_put(old_fence);
>>>    }
>>>    EXPORT_SYMBOL(drm_syncobj_replace_fence);
>>>    @@ -362,64 +209,6 @@ static int 
>>> drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
>>>        return 0;
>>>    }
>>>    -static int
>>> -drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 
>>> flags,
>>> -              struct dma_fence **fence)
>>> -{
>>> -    int ret = 0;
>>> -
>>> -    if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> -        ret = wait_event_interruptible(syncobj->wq,
>>> -                           point <= syncobj->signal_point);
>>> -        if (ret < 0)
>>> -            return ret;
>>> -    }
>>> -    spin_lock(&syncobj->pt_lock);
>>> -    *fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
>>> -    if (!*fence)
>>> -        ret = -EINVAL;
>>> -    spin_unlock(&syncobj->pt_lock);
>>> -    return ret;
>>> -}
>>> -
>>> -/**
>>> - * drm_syncobj_search_fence - lookup and reference the fence in a 
>>> sync object or
>>> - * in a timeline point
>>> - * @syncobj: sync object pointer
>>> - * @point: timeline point
>>> - * @flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT or not
>>> - * @fence: out parameter for the fence
>>> - *
>>> - * if flags is DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, the function 
>>> will block
>>> - * here until specific timeline points is reached.
>>> - * if not, you need a submit thread and block in userspace until 
>>> all future
>>> - * timeline points have materialized, only then you can submit to 
>>> the kernel,
>>> - * otherwise, function will fail to return fence.
>>> - *
>>> - * Returns 0 on success or a negative error value on failure. On 
>>> success @fence
>>> - * contains a reference to the fence, which must be released by 
>>> calling
>>> - * dma_fence_put().
>>> - */
>>> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
>>> -                 u64 flags, struct dma_fence **fence)
>>> -{
>>> -    u64 pt_value = point;
>>> -
>>> -    if (!syncobj)
>>> -        return -ENOENT;
>>> -
>>> -    drm_syncobj_garbage_collection(syncobj);
>>> -    if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
>>> -        /*BINARY syncobj always wait on last pt */
>>> -        pt_value = syncobj->signal_point;
>>> -
>>> -        if (pt_value == 0)
>>> -            pt_value += DRM_SYNCOBJ_BINARY_POINT;
>>> -    }
>>> -    return drm_syncobj_point_get(syncobj, pt_value, flags, fence);
>>> -}
>>> -EXPORT_SYMBOL(drm_syncobj_search_fence);
>>> -
>>>    /**
>>>     * drm_syncobj_find_fence - lookup and reference the fence in a 
>>> sync object
>>>     * @file_private: drm file private pointer
>>> @@ -429,7 +218,7 @@ EXPORT_SYMBOL(drm_syncobj_search_fence);
>>>     * @fence: out parameter for the fence
>>>     *
>>>     * This is just a convenience function that combines 
>>> drm_syncobj_find() and
>>> - * drm_syncobj_lookup_fence().
>>> + * drm_syncobj_fence_get().
>>>     *
>>>     * Returns 0 on success or a negative error value on failure. On 
>>> success @fence
>>>     * contains a reference to the fence, which must be released by 
>>> calling
>>> @@ -440,11 +229,16 @@ int drm_syncobj_find_fence(struct drm_file 
>>> *file_private,
>>>                   struct dma_fence **fence)
>>>    {
>>>        struct drm_syncobj *syncobj = drm_syncobj_find(file_private, 
>>> handle);
>>> -    int ret;
>>> +    int ret = 0;
>>>    -    ret = drm_syncobj_search_fence(syncobj, point, flags, fence);
>>> -    if (syncobj)
>>> -        drm_syncobj_put(syncobj);
>>> +    if (!syncobj)
>>> +        return -ENOENT;
>>> +
>>> +    *fence = drm_syncobj_fence_get(syncobj);
>>> +    if (!*fence) {
>>> +        ret = -EINVAL;
>>> +    }
>>> +    drm_syncobj_put(syncobj);
>>>        return ret;
>>>    }
>>>    EXPORT_SYMBOL(drm_syncobj_find_fence);
>>> @@ -460,7 +254,7 @@ void drm_syncobj_free(struct kref *kref)
>>>        struct drm_syncobj *syncobj = container_of(kref,
>>>                               struct drm_syncobj,
>>>                               refcount);
>>> -    drm_syncobj_fini(syncobj);
>>> +    drm_syncobj_replace_fence(syncobj, 0, NULL);
>>>        kfree(syncobj);
>>>    }
>>>    EXPORT_SYMBOL(drm_syncobj_free);
>>> @@ -489,13 +283,7 @@ int drm_syncobj_create(struct drm_syncobj 
>>> **out_syncobj, uint32_t flags,
>>>           kref_init(&syncobj->refcount);
>>>        INIT_LIST_HEAD(&syncobj->cb_list);
>>> -    spin_lock_init(&syncobj->pt_lock);
>>> -    mutex_init(&syncobj->cb_mutex);
>>> -    if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
>>> -        syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
>>> -    else
>>> -        syncobj->type = DRM_SYNCOBJ_TYPE_BINARY;
>>> -    drm_syncobj_init(syncobj);
>>> +    spin_lock_init(&syncobj->lock);
>>>           if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
>>>            ret = drm_syncobj_assign_null_handle(syncobj);
>>> @@ -778,8 +566,7 @@ drm_syncobj_create_ioctl(struct drm_device *dev, 
>>> void *data,
>>>            return -EOPNOTSUPP;
>>>           /* no valid flags yet */
>>> -    if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED |
>>> -                DRM_SYNCOBJ_CREATE_TYPE_TIMELINE))
>>> +    if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
>>>            return -EINVAL;
>>>           return drm_syncobj_create_as_handle(file_private,
>>> @@ -872,8 +659,9 @@ static void syncobj_wait_syncobj_func(struct 
>>> drm_syncobj *syncobj,
>>>        struct syncobj_wait_entry *wait =
>>>            container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>>    -    drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence);
>>> -
>>> +    /* This happens inside the syncobj lock */
>>> +    wait->fence = 
>>> dma_fence_get(rcu_dereference_protected(syncobj->fence,
>>> + lockdep_is_held(&syncobj->lock)));
>>>        wake_up_process(wait->task);
>>>    }
>>>    @@ -899,8 +687,7 @@ static signed long 
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>        signaled_count = 0;
>>>        for (i = 0; i < count; ++i) {
>>>            entries[i].task = current;
>>> -        drm_syncobj_search_fence(syncobjs[i], 0, 0,
>>> -                     &entries[i].fence);
>>> +        entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>>>            if (!entries[i].fence) {
>>>                if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>                    continue;
>>> @@ -931,9 +718,6 @@ static signed long 
>>> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>           if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>            for (i = 0; i < count; ++i) {
>>> -            if (entries[i].fence)
>>> -                continue;
>>> -
>>> drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>>                                      &entries[i].fence,
>>> &entries[i].syncobj_cb,
>>> @@ -1165,13 +949,12 @@ drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>        if (ret < 0)
>>>            return ret;
>>>    -    for (i = 0; i < args->count_handles; i++) {
>>> -        drm_syncobj_fini(syncobjs[i]);
>>> -        drm_syncobj_init(syncobjs[i]);
>>> -    }
>>> +    for (i = 0; i < args->count_handles; i++)
>>> +        drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
>>> +
>>>        drm_syncobj_array_free(syncobjs, args->count_handles);
>>>    -    return ret;
>>> +    return 0;
>>>    }
>>>       int
>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>> index 29244cbcd05e..2eda44def639 100644
>>> --- a/include/drm/drm_syncobj.h
>>> +++ b/include/drm/drm_syncobj.h
>>> @@ -30,15 +30,10 @@
>>>       struct drm_syncobj_cb;
>>>    -enum drm_syncobj_type {
>>> -    DRM_SYNCOBJ_TYPE_BINARY,
>>> -    DRM_SYNCOBJ_TYPE_TIMELINE
>>> -};
>>> -
>>>    /**
>>>     * struct drm_syncobj - sync object.
>>>     *
>>> - * This structure defines a generic sync object which is timeline 
>>> based.
>>> + * This structure defines a generic sync object which wraps a 
>>> &dma_fence.
>>>     */
>>>    struct drm_syncobj {
>>>        /**
>>> @@ -46,42 +41,21 @@ struct drm_syncobj {
>>>         */
>>>        struct kref refcount;
>>>        /**
>>> -     * @type: indicate syncobj type
>>> -     */
>>> -    enum drm_syncobj_type type;
>>> -    /**
>>> -     * @wq: wait signal operation work queue
>>> -     */
>>> -    wait_queue_head_t    wq;
>>> -    /**
>>> -     * @timeline_context: fence context used by timeline
>>> +     * @fence:
>>> +     * NULL or a pointer to the fence bound to this object.
>>> +     *
>>> +     * This field should not be used directly. Use 
>>> drm_syncobj_fence_get()
>>> +     * and drm_syncobj_replace_fence() instead.
>>>         */
>>> -    u64 timeline_context;
>>> +    struct dma_fence __rcu *fence;
>>>        /**
>>> -     * @timeline: syncobj timeline value, which indicates point is 
>>> signaled.
>>> +     * @cb_list: List of callbacks to call when the &fence gets 
>>> replaced.
>>>         */
>>> -    u64 timeline;
>>> -    /**
>>> -     * @signal_point: which indicates the latest signaler point.
>>> -     */
>>> -    u64 signal_point;
>>> -    /**
>>> -     * @signal_pt_list: signaler point list.
>>> -     */
>>> -    struct list_head signal_pt_list;
>>> -
>>> -    /**
>>> -         * @cb_list: List of callbacks to call when the &fence gets 
>>> replaced.
>>> -         */
>>>        struct list_head cb_list;
>>>        /**
>>> -     * @pt_lock: Protects pt list.
>>> +     * @lock: Protects &cb_list and write-locks &fence.
>>>         */
>>> -    spinlock_t pt_lock;
>>> -    /**
>>> -     * @cb_mutex: Protects syncobj cb list.
>>> -     */
>>> -    struct mutex cb_mutex;
>>> +    spinlock_t lock;
>>>        /**
>>>         * @file: A file backing for this syncobj.
>>>         */
>>> @@ -94,7 +68,7 @@ typedef void (*drm_syncobj_func_t)(struct 
>>> drm_syncobj *syncobj,
>>>    /**
>>>     * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
>>>     * @node: used by drm_syncob_add_callback to append this struct to
>>> - *       &drm_syncobj.cb_list
>>> + *      &drm_syncobj.cb_list
>>>     * @func: drm_syncobj_func_t to call
>>>     *
>>>     * This struct will be initialized by drm_syncobj_add_callback, 
>>> additional
>>> @@ -132,6 +106,29 @@ drm_syncobj_put(struct drm_syncobj *obj)
>>>        kref_put(&obj->refcount, drm_syncobj_free);
>>>    }
>>>    +/**
>>> + * drm_syncobj_fence_get - get a reference to a fence in a sync object
>>> + * @syncobj: sync object.
>>> + *
>>> + * This acquires additional reference to &drm_syncobj.fence 
>>> contained in @obj,
>>> + * if not NULL. It is illegal to call this without already holding 
>>> a reference.
>>> + * No locks required.
>>> + *
>>> + * Returns:
>>> + * Either the fence of @obj or NULL if there's none.
>>> + */
>>> +static inline struct dma_fence *
>>> +drm_syncobj_fence_get(struct drm_syncobj *syncobj)
>>> +{
>>> +    struct dma_fence *fence;
>>> +
>>> +    rcu_read_lock();
>>> +    fence = dma_fence_get_rcu_safe(&syncobj->fence);
>>> +    rcu_read_unlock();
>>> +
>>> +    return fence;
>>> +}
>>> +
>>>    struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
>>>                         u32 handle);
>>>    void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 
>>> point,
>>> @@ -145,7 +142,5 @@ int drm_syncobj_create(struct drm_syncobj 
>>> **out_syncobj, uint32_t flags,
>>>    int drm_syncobj_get_handle(struct drm_file *file_private,
>>>                   struct drm_syncobj *syncobj, u32 *handle);
>>>    int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
>>> -int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 
>>> point, u64 flags,
>>> -                 struct dma_fence **fence);
>>>       #endif
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Eric Anholt Nov. 9, 2018, 9:10 p.m. UTC | #4
zhoucm1 <zhoucm1@amd.com> writes:

> On 2018年11月09日 00:52, Christian König wrote:
>> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>>> Daniel suggested I submit this, since we're still seeing regressions
>>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>>> timeline support v9") and its followon fixes.
>>> This is a harmless false positive from lockdep, Chouming and I are
>>> already working on a fix.
>>
>> On the other hand we had enough trouble with that patch, so if it 
>> really bothers you feel free to add my Acked-by: Christian König 
>> <christian.koenig@amd.com> and push it.
> NAK, please no, I don't think this needed, the Warning totally isn't 
> related to syncobj timeline, but fence-array implementation flaw, just 
> exposed by syncobj.
> In addition, Christian already has a fix for this Warning, I've tested. 
> Please Christian send to public review.

I backed out my revert of #2 (#1 still necessary) after adding the
lockdep regression fix, and now my CTS run got oomkilled after just a
few hours, with these notable lines in the unreclaimable slab info list:

[ 6314.373099] drm_sched_fence        69095KB      69095KB
[ 6314.373653] kmemleak_object       428249KB     428384KB
[ 6314.373736] kmalloc-262144           256KB        256KB
[ 6314.373743] kmalloc-131072           128KB        128KB
[ 6314.373750] kmalloc-65536             64KB         64KB
[ 6314.373756] kmalloc-32768           1472KB       1728KB
[ 6314.373763] kmalloc-16384             64KB         64KB
[ 6314.373770] kmalloc-8192             208KB        208KB
[ 6314.373778] kmalloc-4096            2408KB       2408KB
[ 6314.373784] kmalloc-2048             288KB        336KB
[ 6314.373792] kmalloc-1024            1457KB       1512KB
[ 6314.373800] kmalloc-512              854KB       1048KB
[ 6314.373808] kmalloc-256              188KB        268KB
[ 6314.373817] kmalloc-192            69141KB      69142KB
[ 6314.373824] kmalloc-64             47703KB      47704KB
[ 6314.373886] kmalloc-128            46396KB      46396KB
[ 6314.373894] kmem_cache                31KB         35KB

No results from kmemleak, though.
Eric Anholt Nov. 9, 2018, 10:26 p.m. UTC | #5
Eric Anholt <eric@anholt.net> writes:

> [ Unknown signature status ]
> zhoucm1 <zhoucm1@amd.com> writes:
>
>> On 2018年11月09日 00:52, Christian König wrote:
>>> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>>>> Daniel suggested I submit this, since we're still seeing regressions
>>>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>>>> timeline support v9") and its followon fixes.
>>>> This is a harmless false positive from lockdep, Chouming and I are
>>>> already working on a fix.
>>>
>>> On the other hand we had enough trouble with that patch, so if it 
>>> really bothers you feel free to add my Acked-by: Christian König 
>>> <christian.koenig@amd.com> and push it.
>> NAK, please no, I don't think this needed, the Warning totally isn't 
>> related to syncobj timeline, but fence-array implementation flaw, just 
>> exposed by syncobj.
>> In addition, Christian already has a fix for this Warning, I've tested. 
>> Please Christian send to public review.
>
> I backed out my revert of #2 (#1 still necessary) after adding the
> lockdep regression fix, and now my CTS run got oomkilled after just a
> few hours, with these notable lines in the unreclaimable slab info list:
>
> [ 6314.373099] drm_sched_fence        69095KB      69095KB
> [ 6314.373653] kmemleak_object       428249KB     428384KB
> [ 6314.373736] kmalloc-262144           256KB        256KB
> [ 6314.373743] kmalloc-131072           128KB        128KB
> [ 6314.373750] kmalloc-65536             64KB         64KB
> [ 6314.373756] kmalloc-32768           1472KB       1728KB
> [ 6314.373763] kmalloc-16384             64KB         64KB
> [ 6314.373770] kmalloc-8192             208KB        208KB
> [ 6314.373778] kmalloc-4096            2408KB       2408KB
> [ 6314.373784] kmalloc-2048             288KB        336KB
> [ 6314.373792] kmalloc-1024            1457KB       1512KB
> [ 6314.373800] kmalloc-512              854KB       1048KB
> [ 6314.373808] kmalloc-256              188KB        268KB
> [ 6314.373817] kmalloc-192            69141KB      69142KB
> [ 6314.373824] kmalloc-64             47703KB      47704KB
> [ 6314.373886] kmalloc-128            46396KB      46396KB
> [ 6314.373894] kmem_cache                31KB         35KB
>
> No results from kmemleak, though.

OK, it looks like the #2 revert probably isn't related to the OOM issue.
Running a single job on otherwise unused DRM, watching /proc/slabinfo
every second for drm_sched_fence, I get:

drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0

So we generate a ton of fences, and I guess free them slowly because of
RCU?  And presumably kmemleak was sucking up lots of memory because of
how many of these objects were laying around.
Christian König Nov. 12, 2018, 10:16 a.m. UTC | #6
Am 09.11.18 um 23:26 schrieb Eric Anholt:
> Eric Anholt <eric@anholt.net> writes:
>
>> [ Unknown signature status ]
>> zhoucm1 <zhoucm1@amd.com> writes:
>>
>>> On 2018年11月09日 00:52, Christian König wrote:
>>>> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>>>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>>>>> Daniel suggested I submit this, since we're still seeing regressions
>>>>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>>>>> timeline support v9") and its followon fixes.
>>>>> This is a harmless false positive from lockdep, Chouming and I are
>>>>> already working on a fix.
>>>> On the other hand we had enough trouble with that patch, so if it
>>>> really bothers you feel free to add my Acked-by: Christian König
>>>> <christian.koenig@amd.com> and push it.
>>> NAK, please no, I don't think this needed, the Warning totally isn't
>>> related to syncobj timeline, but fence-array implementation flaw, just
>>> exposed by syncobj.
>>> In addition, Christian already has a fix for this Warning, I've tested.
>>> Please Christian send to public review.
>> I backed out my revert of #2 (#1 still necessary) after adding the
>> lockdep regression fix, and now my CTS run got oomkilled after just a
>> few hours, with these notable lines in the unreclaimable slab info list:
>>
>> [ 6314.373099] drm_sched_fence        69095KB      69095KB
>> [ 6314.373653] kmemleak_object       428249KB     428384KB
>> [ 6314.373736] kmalloc-262144           256KB        256KB
>> [ 6314.373743] kmalloc-131072           128KB        128KB
>> [ 6314.373750] kmalloc-65536             64KB         64KB
>> [ 6314.373756] kmalloc-32768           1472KB       1728KB
>> [ 6314.373763] kmalloc-16384             64KB         64KB
>> [ 6314.373770] kmalloc-8192             208KB        208KB
>> [ 6314.373778] kmalloc-4096            2408KB       2408KB
>> [ 6314.373784] kmalloc-2048             288KB        336KB
>> [ 6314.373792] kmalloc-1024            1457KB       1512KB
>> [ 6314.373800] kmalloc-512              854KB       1048KB
>> [ 6314.373808] kmalloc-256              188KB        268KB
>> [ 6314.373817] kmalloc-192            69141KB      69142KB
>> [ 6314.373824] kmalloc-64             47703KB      47704KB
>> [ 6314.373886] kmalloc-128            46396KB      46396KB
>> [ 6314.373894] kmem_cache                31KB         35KB
>>
>> No results from kmemleak, though.
> OK, it looks like the #2 revert probably isn't related to the OOM issue.
> Running a single job on otherwise unused DRM, watching /proc/slabinfo
> every second for drm_sched_fence, I get:
>
> drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
> drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>
> So we generate a ton of fences, and I guess free them slowly because of
> RCU?  And presumably kmemleak was sucking up lots of memory because of
> how many of these objects were laying around.

That is certainly possible. Another possibility is that we don't drop 
the reference in dma-fence-array early enough.

E.g. the dma-fence-array will keep the reference to its fences until it 
is destroyed, which is a bit late when you chain multiple 
dma-fence-array objects together.

David can you take a look at this and propose a fix? That would probably 
be good to have fixed in dma-fence-array separately to the timeline work.

Thanks,
Christian.

>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">Am 09.11.18 um 23:26 schrieb Eric
      Anholt:<br>
    </div>
    <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net">
      <pre class="moz-quote-pre" wrap="">Eric Anholt <a class="moz-txt-link-rfc2396E" href="mailto:eric@anholt.net">&lt;eric@anholt.net&gt;</a> writes:

</pre>
      <blockquote type="cite">
        <pre class="moz-quote-pre" wrap="">[ Unknown signature status ]
zhoucm1 <a class="moz-txt-link-rfc2396E" href="mailto:zhoucm1@amd.com">&lt;zhoucm1@amd.com&gt;</a> writes:

</pre>
        <blockquote type="cite">
          <pre class="moz-quote-pre" wrap="">On 2018年11月09日 00:52, Christian König wrote:
</pre>
          <blockquote type="cite">
            <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:07 schrieb Koenig, Christian:
</pre>
            <blockquote type="cite">
              <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:04 schrieb Eric Anholt:
</pre>
              <blockquote type="cite">
                <pre class="moz-quote-pre" wrap="">Daniel suggested I submit this, since we're still seeing regressions
from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
timeline support v9") and its followon fixes.
</pre>
              </blockquote>
              <pre class="moz-quote-pre" wrap="">This is a harmless false positive from lockdep, Chouming and I are
already working on a fix.
</pre>
            </blockquote>
            <pre class="moz-quote-pre" wrap="">
On the other hand we had enough trouble with that patch, so if it 
really bothers you feel free to add my Acked-by: Christian König 
<a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com">&lt;christian.koenig@amd.com&gt;</a> and push it.
</pre>
          </blockquote>
          <pre class="moz-quote-pre" wrap="">NAK, please no, I don't think this needed, the Warning totally isn't 
related to syncobj timeline, but fence-array implementation flaw, just 
exposed by syncobj.
In addition, Christian already has a fix for this Warning, I've tested. 
Please Christian send to public review.
</pre>
        </blockquote>
        <pre class="moz-quote-pre" wrap="">
I backed out my revert of #2 (#1 still necessary) after adding the
lockdep regression fix, and now my CTS run got oomkilled after just a
few hours, with these notable lines in the unreclaimable slab info list:

[ 6314.373099] drm_sched_fence        69095KB      69095KB
[ 6314.373653] kmemleak_object       428249KB     428384KB
[ 6314.373736] kmalloc-262144           256KB        256KB
[ 6314.373743] kmalloc-131072           128KB        128KB
[ 6314.373750] kmalloc-65536             64KB         64KB
[ 6314.373756] kmalloc-32768           1472KB       1728KB
[ 6314.373763] kmalloc-16384             64KB         64KB
[ 6314.373770] kmalloc-8192             208KB        208KB
[ 6314.373778] kmalloc-4096            2408KB       2408KB
[ 6314.373784] kmalloc-2048             288KB        336KB
[ 6314.373792] kmalloc-1024            1457KB       1512KB
[ 6314.373800] kmalloc-512              854KB       1048KB
[ 6314.373808] kmalloc-256              188KB        268KB
[ 6314.373817] kmalloc-192            69141KB      69142KB
[ 6314.373824] kmalloc-64             47703KB      47704KB
[ 6314.373886] kmalloc-128            46396KB      46396KB
[ 6314.373894] kmem_cache                31KB         35KB

No results from kmemleak, though.
</pre>
      </blockquote>
      <pre class="moz-quote-pre" wrap="">
OK, it looks like the #2 revert probably isn't related to the OOM issue.
Running a single job on otherwise unused DRM, watching /proc/slabinfo
every second for drm_sched_fence, I get:

drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0

So we generate a ton of fences, and I guess free them slowly because of
RCU?  And presumably kmemleak was sucking up lots of memory because of
how many of these objects were laying around.</pre>
    </blockquote>
    <br>
    That is certainly possible. Another possibility is that we don't
    drop the reference in dma-fence-array early enough.<br>
    <br>
    E.g. the dma-fence-array will keep the reference to its fences until
    it is destroyed, which is a bit late when you chain multiple
    dma-fence-array objects together.<br>
    <br>
    David can you take a look at this and propose a fix? That would
    probably be good to have fixed in dma-fence-array separately to the
    timeline work.<br>
    <br>
    Thanks,<br>
    Christian.<br>
    <br>
    <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net">
      <pre class="moz-quote-pre" wrap="">
</pre>
      <br>
      <fieldset class="mimeAttachmentHeader"></fieldset>
      <pre class="moz-quote-pre" wrap="">_______________________________________________
dri-devel mailing list
<a class="moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org">dri-devel@lists.freedesktop.org</a>
<a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/dri-devel">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a>
</pre>
    </blockquote>
    <br>
  </body>
</html>
Chunming Zhou Nov. 12, 2018, 10:28 a.m. UTC | #7
On 2018年11月12日 18:16, Christian König wrote:
> Am 09.11.18 um 23:26 schrieb Eric Anholt:
>> Eric Anholt<eric@anholt.net>  writes:
>>
>>> [ Unknown signature status ]
>>> zhoucm1<zhoucm1@amd.com>  writes:
>>>
>>>> On 2018年11月09日 00:52, Christian König wrote:
>>>>> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>>>>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>>>>>> Daniel suggested I submit this, since we're still seeing regressions
>>>>>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>>>>>> timeline support v9") and its followon fixes.
>>>>>> This is a harmless false positive from lockdep, Chouming and I are
>>>>>> already working on a fix.
>>>>> On the other hand we had enough trouble with that patch, so if it
>>>>> really bothers you feel free to add my Acked-by: Christian König
>>>>> <christian.koenig@amd.com>  and push it.
>>>> NAK, please no, I don't think this needed, the Warning totally isn't
>>>> related to syncobj timeline, but fence-array implementation flaw, just
>>>> exposed by syncobj.
>>>> In addition, Christian already has a fix for this Warning, I've tested.
>>>> Please Christian send to public review.
>>> I backed out my revert of #2 (#1 still necessary) after adding the
>>> lockdep regression fix, and now my CTS run got oomkilled after just a
>>> few hours, with these notable lines in the unreclaimable slab info list:
>>>
>>> [ 6314.373099] drm_sched_fence        69095KB      69095KB
>>> [ 6314.373653] kmemleak_object       428249KB     428384KB
>>> [ 6314.373736] kmalloc-262144           256KB        256KB
>>> [ 6314.373743] kmalloc-131072           128KB        128KB
>>> [ 6314.373750] kmalloc-65536             64KB         64KB
>>> [ 6314.373756] kmalloc-32768           1472KB       1728KB
>>> [ 6314.373763] kmalloc-16384             64KB         64KB
>>> [ 6314.373770] kmalloc-8192             208KB        208KB
>>> [ 6314.373778] kmalloc-4096            2408KB       2408KB
>>> [ 6314.373784] kmalloc-2048             288KB        336KB
>>> [ 6314.373792] kmalloc-1024            1457KB       1512KB
>>> [ 6314.373800] kmalloc-512              854KB       1048KB
>>> [ 6314.373808] kmalloc-256              188KB        268KB
>>> [ 6314.373817] kmalloc-192            69141KB      69142KB
>>> [ 6314.373824] kmalloc-64             47703KB      47704KB
>>> [ 6314.373886] kmalloc-128            46396KB      46396KB
>>> [ 6314.373894] kmem_cache                31KB         35KB
>>>
>>> No results from kmemleak, though.
>> OK, it looks like the #2 revert probably isn't related to the OOM issue.
Before you judge if it is memleak, to be honest, you can scan it first.

>> Running a single job on otherwise unused DRM, watching /proc/slabinfo
>> every second for drm_sched_fence, I get:
>>
>> drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
>> drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>
>> So we generate a ton of fences, and I guess free them slowly because of
>> RCU?  And presumably kmemleak was sucking up lots of memory because of
>> how many of these objects were laying around.
>
> That is certainly possible. Another possibility is that we don't drop 
> the reference in dma-fence-array early enough.
>
> E.g. the dma-fence-array will keep the reference to its fences until 
> it is destroyed, which is a bit late when you chain multiple 
> dma-fence-array objects together.
Good point, but need to confirm.

>
> David can you take a look at this and propose a fix? That would 
> probably be good to have fixed in dma-fence-array separately to the 
> timeline work.
Yeah,  I would find a free time for it.

Thanks,
David Zhou
>
> Thanks,
> Christian.
>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 2018年11月12日 18:16, Christian König
      wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div class="moz-cite-prefix">Am 09.11.18 um 23:26 schrieb Eric
        Anholt:<br>
      </div>
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net">
        <pre class="moz-quote-pre" wrap="">Eric Anholt <a class="moz-txt-link-rfc2396E" href="mailto:eric@anholt.net" moz-do-not-send="true">&lt;eric@anholt.net&gt;</a> writes:

</pre>
        <blockquote type="cite">
          <pre class="moz-quote-pre" wrap="">[ Unknown signature status ]
zhoucm1 <a class="moz-txt-link-rfc2396E" href="mailto:zhoucm1@amd.com" moz-do-not-send="true">&lt;zhoucm1@amd.com&gt;</a> writes:

</pre>
          <blockquote type="cite">
            <pre class="moz-quote-pre" wrap="">On 2018年11月09日 00:52, Christian König wrote:
</pre>
            <blockquote type="cite">
              <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:07 schrieb Koenig, Christian:
</pre>
              <blockquote type="cite">
                <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:04 schrieb Eric Anholt:
</pre>
                <blockquote type="cite">
                  <pre class="moz-quote-pre" wrap="">Daniel suggested I submit this, since we're still seeing regressions
from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
timeline support v9") and its followon fixes.
</pre>
                </blockquote>
                <pre class="moz-quote-pre" wrap="">This is a harmless false positive from lockdep, Chouming and I are
already working on a fix.
</pre>
              </blockquote>
              <pre class="moz-quote-pre" wrap="">On the other hand we had enough trouble with that patch, so if it 
really bothers you feel free to add my Acked-by: Christian König 
<a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com" moz-do-not-send="true">&lt;christian.koenig@amd.com&gt;</a> and push it.
</pre>
            </blockquote>
            <pre class="moz-quote-pre" wrap="">NAK, please no, I don't think this needed, the Warning totally isn't 
related to syncobj timeline, but fence-array implementation flaw, just 
exposed by syncobj.
In addition, Christian already has a fix for this Warning, I've tested. 
Please Christian send to public review.
</pre>
          </blockquote>
          <pre class="moz-quote-pre" wrap="">I backed out my revert of #2 (#1 still necessary) after adding the
lockdep regression fix, and now my CTS run got oomkilled after just a
few hours, with these notable lines in the unreclaimable slab info list:

[ 6314.373099] drm_sched_fence        69095KB      69095KB
[ 6314.373653] kmemleak_object       428249KB     428384KB
[ 6314.373736] kmalloc-262144           256KB        256KB
[ 6314.373743] kmalloc-131072           128KB        128KB
[ 6314.373750] kmalloc-65536             64KB         64KB
[ 6314.373756] kmalloc-32768           1472KB       1728KB
[ 6314.373763] kmalloc-16384             64KB         64KB
[ 6314.373770] kmalloc-8192             208KB        208KB
[ 6314.373778] kmalloc-4096            2408KB       2408KB
[ 6314.373784] kmalloc-2048             288KB        336KB
[ 6314.373792] kmalloc-1024            1457KB       1512KB
[ 6314.373800] kmalloc-512              854KB       1048KB
[ 6314.373808] kmalloc-256              188KB        268KB
[ 6314.373817] kmalloc-192            69141KB      69142KB
[ 6314.373824] kmalloc-64             47703KB      47704KB
[ 6314.373886] kmalloc-128            46396KB      46396KB
[ 6314.373894] kmem_cache                31KB         35KB

No results from kmemleak, though.
</pre>
        </blockquote>
        <pre class="moz-quote-pre" wrap="">OK, it looks like the #2 revert probably isn't related to the OOM issue.</pre>
      </blockquote>
    </blockquote>
    Before you judge if it is memleak, to be honest, you can scan it
    first.<br>
     <br>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com">
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net">
        <pre class="moz-quote-pre" wrap="">
Running a single job on otherwise unused DRM, watching /proc/slabinfo
every second for drm_sched_fence, I get:

drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0

So we generate a ton of fences, and I guess free them slowly because of
RCU?  And presumably kmemleak was sucking up lots of memory because of
how many of these objects were laying around.</pre>
      </blockquote>
      <br>
      That is certainly possible. Another possibility is that we don't
      drop the reference in dma-fence-array early enough.<br>
      <br>
      E.g. the dma-fence-array will keep the reference to its fences
      until it is destroyed, which is a bit late when you chain multiple
      dma-fence-array objects together.<br>
    </blockquote>
    Good point, but need to confirm.<br>
    <br>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com"> <br>
      David can you take a look at this and propose a fix? That would
      probably be good to have fixed in dma-fence-array separately to
      the timeline work.<br>
    </blockquote>
    Yeah,  I would find a free time for it.<br>
    <br>
    Thanks,<br>
    David Zhou<br>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com"> <br>
      Thanks,<br>
      Christian.<br>
      <br>
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net"> <br>
        <fieldset class="mimeAttachmentHeader"></fieldset>
        <pre class="moz-quote-pre" wrap="">_______________________________________________
dri-devel mailing list
<a class="moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org" moz-do-not-send="true">dri-devel@lists.freedesktop.org</a>
<a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/dri-devel" moz-do-not-send="true">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a>
</pre>
      </blockquote>
      <br>
    </blockquote>
    <br>
  </body>
</html>
Chris Wilson Nov. 12, 2018, 10:48 a.m. UTC | #8
Quoting Christian König (2018-11-12 10:16:01)
> Am 09.11.18 um 23:26 schrieb Eric Anholt:
> 
>     Eric Anholt <eric@anholt.net> writes:
> 
> 
>         [ Unknown signature status ]
>         zhoucm1 <zhoucm1@amd.com> writes:
> 
> 
>             On 2018年11月09日 00:52, Christian König wrote:
> 
>                 Am 08.11.18 um 17:07 schrieb Koenig, Christian:
> 
>                     Am 08.11.18 um 17:04 schrieb Eric Anholt:
> 
>                         Daniel suggested I submit this, since we're still seeing regressions
>                         from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>                         timeline support v9") and its followon fixes.
> 
>                     This is a harmless false positive from lockdep, Chouming and I are
>                     already working on a fix.
> 
>                 On the other hand we had enough trouble with that patch, so if it
>                 really bothers you feel free to add my Acked-by: Christian König
>                 <christian.koenig@amd.com> and push it.
> 
>             NAK, please no, I don't think this needed, the Warning totally isn't
>             related to syncobj timeline, but fence-array implementation flaw, just
>             exposed by syncobj.
>             In addition, Christian already has a fix for this Warning, I've tested.
>             Please Christian send to public review.
> 
>         I backed out my revert of #2 (#1 still necessary) after adding the
>         lockdep regression fix, and now my CTS run got oomkilled after just a
>         few hours, with these notable lines in the unreclaimable slab info list:
> 
>         [ 6314.373099] drm_sched_fence        69095KB      69095KB
>         [ 6314.373653] kmemleak_object       428249KB     428384KB
>         [ 6314.373736] kmalloc-262144           256KB        256KB
>         [ 6314.373743] kmalloc-131072           128KB        128KB
>         [ 6314.373750] kmalloc-65536             64KB         64KB
>         [ 6314.373756] kmalloc-32768           1472KB       1728KB
>         [ 6314.373763] kmalloc-16384             64KB         64KB
>         [ 6314.373770] kmalloc-8192             208KB        208KB
>         [ 6314.373778] kmalloc-4096            2408KB       2408KB
>         [ 6314.373784] kmalloc-2048             288KB        336KB
>         [ 6314.373792] kmalloc-1024            1457KB       1512KB
>         [ 6314.373800] kmalloc-512              854KB       1048KB
>         [ 6314.373808] kmalloc-256              188KB        268KB
>         [ 6314.373817] kmalloc-192            69141KB      69142KB
>         [ 6314.373824] kmalloc-64             47703KB      47704KB
>         [ 6314.373886] kmalloc-128            46396KB      46396KB
>         [ 6314.373894] kmem_cache                31KB         35KB
> 
>         No results from kmemleak, though.
> 
>     OK, it looks like the #2 revert probably isn't related to the OOM issue.
>     Running a single job on otherwise unused DRM, watching /proc/slabinfo
>     every second for drm_sched_fence, I get:
> 
>     drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
>     drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>     drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>     drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>     drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>     drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>     drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
> 
>     So we generate a ton of fences, and I guess free them slowly because of
>     RCU?  And presumably kmemleak was sucking up lots of memory because of
>     how many of these objects were laying around.
> 
> 
> That is certainly possible. Another possibility is that we don't drop the
> reference in dma-fence-array early enough.
> 
> E.g. the dma-fence-array will keep the reference to its fences until it is
> destroyed, which is a bit late when you chain multiple dma-fence-array objects
> together.
> 
> David can you take a look at this and propose a fix? That would probably be
> good to have fixed in dma-fence-array separately to the timeline work.

Note that drm_syncobj_replace_fence() leaks any existing fence for
!timeline syncobjs. Which coupled with the linear search ends up with
a severe regression in both time and memory.
-Chris
Christian König Nov. 12, 2018, 11:47 a.m. UTC | #9
Am 12.11.18 um 11:48 schrieb Chris Wilson:
> Quoting Christian König (2018-11-12 10:16:01)
>> Am 09.11.18 um 23:26 schrieb Eric Anholt:
>>
>>      Eric Anholt <eric@anholt.net> writes:
>>
>>
>>          [ Unknown signature status ]
>>          zhoucm1 <zhoucm1@amd.com> writes:
>>
>>
>>              On 2018年11月09日 00:52, Christian König wrote:
>>
>>                  Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>
>>                      Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>
>>                          Daniel suggested I submit this, since we're still seeing regressions
>>                          from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>                          timeline support v9") and its followon fixes.
>>
>>                      This is a harmless false positive from lockdep, Chouming and I are
>>                      already working on a fix.
>>
>>                  On the other hand we had enough trouble with that patch, so if it
>>                  really bothers you feel free to add my Acked-by: Christian König
>>                  <christian.koenig@amd.com> and push it.
>>
>>              NAK, please no, I don't think this needed, the Warning totally isn't
>>              related to syncobj timeline, but fence-array implementation flaw, just
>>              exposed by syncobj.
>>              In addition, Christian already has a fix for this Warning, I've tested.
>>              Please Christian send to public review.
>>
>>          I backed out my revert of #2 (#1 still necessary) after adding the
>>          lockdep regression fix, and now my CTS run got oomkilled after just a
>>          few hours, with these notable lines in the unreclaimable slab info list:
>>
>>          [ 6314.373099] drm_sched_fence        69095KB      69095KB
>>          [ 6314.373653] kmemleak_object       428249KB     428384KB
>>          [ 6314.373736] kmalloc-262144           256KB        256KB
>>          [ 6314.373743] kmalloc-131072           128KB        128KB
>>          [ 6314.373750] kmalloc-65536             64KB         64KB
>>          [ 6314.373756] kmalloc-32768           1472KB       1728KB
>>          [ 6314.373763] kmalloc-16384             64KB         64KB
>>          [ 6314.373770] kmalloc-8192             208KB        208KB
>>          [ 6314.373778] kmalloc-4096            2408KB       2408KB
>>          [ 6314.373784] kmalloc-2048             288KB        336KB
>>          [ 6314.373792] kmalloc-1024            1457KB       1512KB
>>          [ 6314.373800] kmalloc-512              854KB       1048KB
>>          [ 6314.373808] kmalloc-256              188KB        268KB
>>          [ 6314.373817] kmalloc-192            69141KB      69142KB
>>          [ 6314.373824] kmalloc-64             47703KB      47704KB
>>          [ 6314.373886] kmalloc-128            46396KB      46396KB
>>          [ 6314.373894] kmem_cache                31KB         35KB
>>
>>          No results from kmemleak, though.
>>
>>      OK, it looks like the #2 revert probably isn't related to the OOM issue.
>>      Running a single job on otherwise unused DRM, watching /proc/slabinfo
>>      every second for drm_sched_fence, I get:
>>
>>      drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
>>      drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>
>>      So we generate a ton of fences, and I guess free them slowly because of
>>      RCU?  And presumably kmemleak was sucking up lots of memory because of
>>      how many of these objects were laying around.
>>
>>
>> That is certainly possible. Another possibility is that we don't drop the
>> reference in dma-fence-array early enough.
>>
>> E.g. the dma-fence-array will keep the reference to its fences until it is
>> destroyed, which is a bit late when you chain multiple dma-fence-array objects
>> together.
>>
>> David can you take a look at this and propose a fix? That would probably be
>> good to have fixed in dma-fence-array separately to the timeline work.
> Note that drm_syncobj_replace_fence() leaks any existing fence for
> !timeline syncobjs. Which coupled with the linear search ends up with
> a severe regression in both time and memory.

Ok, enough is enough. I'm going to revert this.

Thanks for the info,
Christian.

> -Chris
Chunming Zhou Nov. 13, 2018, 5:57 a.m. UTC | #10
On 2018年11月12日 18:48, Chris Wilson wrote:
> Quoting Christian König (2018-11-12 10:16:01)
>> Am 09.11.18 um 23:26 schrieb Eric Anholt:
>>
>>      Eric Anholt <eric@anholt.net> writes:
>>
>>
>>          [ Unknown signature status ]
>>          zhoucm1 <zhoucm1@amd.com> writes:
>>
>>
>>              On 2018年11月09日 00:52, Christian König wrote:
>>
>>                  Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>
>>                      Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>
>>                          Daniel suggested I submit this, since we're still seeing regressions
>>                          from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>                          timeline support v9") and its followon fixes.
>>
>>                      This is a harmless false positive from lockdep, Chouming and I are
>>                      already working on a fix.
>>
>>                  On the other hand we had enough trouble with that patch, so if it
>>                  really bothers you feel free to add my Acked-by: Christian König
>>                  <christian.koenig@amd.com> and push it.
>>
>>              NAK, please no, I don't think this needed, the Warning totally isn't
>>              related to syncobj timeline, but fence-array implementation flaw, just
>>              exposed by syncobj.
>>              In addition, Christian already has a fix for this Warning, I've tested.
>>              Please Christian send to public review.
>>
>>          I backed out my revert of #2 (#1 still necessary) after adding the
>>          lockdep regression fix, and now my CTS run got oomkilled after just a
>>          few hours, with these notable lines in the unreclaimable slab info list:
>>
>>          [ 6314.373099] drm_sched_fence        69095KB      69095KB
>>          [ 6314.373653] kmemleak_object       428249KB     428384KB
>>          [ 6314.373736] kmalloc-262144           256KB        256KB
>>          [ 6314.373743] kmalloc-131072           128KB        128KB
>>          [ 6314.373750] kmalloc-65536             64KB         64KB
>>          [ 6314.373756] kmalloc-32768           1472KB       1728KB
>>          [ 6314.373763] kmalloc-16384             64KB         64KB
>>          [ 6314.373770] kmalloc-8192             208KB        208KB
>>          [ 6314.373778] kmalloc-4096            2408KB       2408KB
>>          [ 6314.373784] kmalloc-2048             288KB        336KB
>>          [ 6314.373792] kmalloc-1024            1457KB       1512KB
>>          [ 6314.373800] kmalloc-512              854KB       1048KB
>>          [ 6314.373808] kmalloc-256              188KB        268KB
>>          [ 6314.373817] kmalloc-192            69141KB      69142KB
>>          [ 6314.373824] kmalloc-64             47703KB      47704KB
>>          [ 6314.373886] kmalloc-128            46396KB      46396KB
>>          [ 6314.373894] kmem_cache                31KB         35KB
>>
>>          No results from kmemleak, though.
>>
>>      OK, it looks like the #2 revert probably isn't related to the OOM issue.
>>      Running a single job on otherwise unused DRM, watching /proc/slabinfo
>>      every second for drm_sched_fence, I get:
>>
>>      drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
>>      drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>      drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>
>>      So we generate a ton of fences, and I guess free them slowly because of
>>      RCU?  And presumably kmemleak was sucking up lots of memory because of
>>      how many of these objects were laying around.
>>
>>
>> That is certainly possible. Another possibility is that we don't drop the
>> reference in dma-fence-array early enough.
>>
>> E.g. the dma-fence-array will keep the reference to its fences until it is
>> destroyed, which is a bit late when you chain multiple dma-fence-array objects
>> together.
>>
>> David can you take a look at this and propose a fix? That would probably be
>> good to have fixed in dma-fence-array separately to the timeline work.
> Note that drm_syncobj_replace_fence() leaks any existing fence for
> !timeline syncobjs.
Hi Chris,

Hui! Isn't existing fence collected as garbage?

Could you point where/how leaks existing fence?

Thanks,
David
> Which coupled with the linear search ends up with
> a severe regression in both time and memory.
> -Chris
Chunming Zhou Nov. 13, 2018, 6:18 a.m. UTC | #11
On 2018年11月12日 18:16, Christian König wrote:
> Am 09.11.18 um 23:26 schrieb Eric Anholt:
>> Eric Anholt<eric@anholt.net>  writes:
>>
>>> [ Unknown signature status ]
>>> zhoucm1<zhoucm1@amd.com>  writes:
>>>
>>>> On 2018年11月09日 00:52, Christian König wrote:
>>>>> Am 08.11.18 um 17:07 schrieb Koenig, Christian:
>>>>>> Am 08.11.18 um 17:04 schrieb Eric Anholt:
>>>>>>> Daniel suggested I submit this, since we're still seeing regressions
>>>>>>> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
>>>>>>> timeline support v9") and its followon fixes.
>>>>>> This is a harmless false positive from lockdep, Chouming and I are
>>>>>> already working on a fix.
>>>>> On the other hand we had enough trouble with that patch, so if it
>>>>> really bothers you feel free to add my Acked-by: Christian König
>>>>> <christian.koenig@amd.com>  and push it.
>>>> NAK, please no, I don't think this needed, the Warning totally isn't
>>>> related to syncobj timeline, but fence-array implementation flaw, just
>>>> exposed by syncobj.
>>>> In addition, Christian already has a fix for this Warning, I've tested.
>>>> Please Christian send to public review.
>>> I backed out my revert of #2 (#1 still necessary) after adding the
>>> lockdep regression fix, and now my CTS run got oomkilled after just a
>>> few hours, with these notable lines in the unreclaimable slab info list:
>>>
>>> [ 6314.373099] drm_sched_fence        69095KB      69095KB
>>> [ 6314.373653] kmemleak_object       428249KB     428384KB
>>> [ 6314.373736] kmalloc-262144           256KB        256KB
>>> [ 6314.373743] kmalloc-131072           128KB        128KB
>>> [ 6314.373750] kmalloc-65536             64KB         64KB
>>> [ 6314.373756] kmalloc-32768           1472KB       1728KB
>>> [ 6314.373763] kmalloc-16384             64KB         64KB
>>> [ 6314.373770] kmalloc-8192             208KB        208KB
>>> [ 6314.373778] kmalloc-4096            2408KB       2408KB
>>> [ 6314.373784] kmalloc-2048             288KB        336KB
>>> [ 6314.373792] kmalloc-1024            1457KB       1512KB
>>> [ 6314.373800] kmalloc-512              854KB       1048KB
>>> [ 6314.373808] kmalloc-256              188KB        268KB
>>> [ 6314.373817] kmalloc-192            69141KB      69142KB
>>> [ 6314.373824] kmalloc-64             47703KB      47704KB
>>> [ 6314.373886] kmalloc-128            46396KB      46396KB
>>> [ 6314.373894] kmem_cache                31KB         35KB
>>>
>>> No results from kmemleak, though.
>> OK, it looks like the #2 revert probably isn't related to the OOM issue.
>> Running a single job on otherwise unused DRM, watching /proc/slabinfo
>> every second for drm_sched_fence, I get:
>>
>> drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
>> drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>> drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
>>
>> So we generate a ton of fences, and I guess free them slowly because of
>> RCU?  And presumably kmemleak was sucking up lots of memory because of
>> how many of these objects were laying around.
Hi Eric,
Thanks for testing, I checked the code, we could forget signal 
fence-array immediately after many reviews without callback for 
fence-array, everything is waiting fence_wait or syncobj free, that's 
why you see "free them slowly".
Maybe we just need one line change as attahced, Could you please have a 
try with it on your tests?

btw,  I also didn't find there is fence-leak, or you can point me.

Thanks,
David
>
> That is certainly possible. Another possibility is that we don't drop 
> the reference in dma-fence-array early enough.
>
> E.g. the dma-fence-array will keep the reference to its fences until 
> it is destroyed, which is a bit late when you chain multiple 
> dma-fence-array objects together.
>
> David can you take a look at this and propose a fix? That would 
> probably be good to have fixed in dma-fence-array separately to the 
> timeline work.
>
> Thanks,
> Christian.
>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p><br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 2018年11月12日 18:16, Christian König
      wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div class="moz-cite-prefix">Am 09.11.18 um 23:26 schrieb Eric
        Anholt:<br>
      </div>
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net">
        <pre class="moz-quote-pre" wrap="">Eric Anholt <a class="moz-txt-link-rfc2396E" href="mailto:eric@anholt.net" moz-do-not-send="true">&lt;eric@anholt.net&gt;</a> writes:

</pre>
        <blockquote type="cite">
          <pre class="moz-quote-pre" wrap="">[ Unknown signature status ]
zhoucm1 <a class="moz-txt-link-rfc2396E" href="mailto:zhoucm1@amd.com" moz-do-not-send="true">&lt;zhoucm1@amd.com&gt;</a> writes:

</pre>
          <blockquote type="cite">
            <pre class="moz-quote-pre" wrap="">On 2018年11月09日 00:52, Christian König wrote:
</pre>
            <blockquote type="cite">
              <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:07 schrieb Koenig, Christian:
</pre>
              <blockquote type="cite">
                <pre class="moz-quote-pre" wrap="">Am 08.11.18 um 17:04 schrieb Eric Anholt:
</pre>
                <blockquote type="cite">
                  <pre class="moz-quote-pre" wrap="">Daniel suggested I submit this, since we're still seeing regressions
from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
timeline support v9") and its followon fixes.
</pre>
                </blockquote>
                <pre class="moz-quote-pre" wrap="">This is a harmless false positive from lockdep, Chouming and I are
already working on a fix.
</pre>
              </blockquote>
              <pre class="moz-quote-pre" wrap="">On the other hand we had enough trouble with that patch, so if it 
really bothers you feel free to add my Acked-by: Christian König 
<a class="moz-txt-link-rfc2396E" href="mailto:christian.koenig@amd.com" moz-do-not-send="true">&lt;christian.koenig@amd.com&gt;</a> and push it.
</pre>
            </blockquote>
            <pre class="moz-quote-pre" wrap="">NAK, please no, I don't think this needed, the Warning totally isn't 
related to syncobj timeline, but fence-array implementation flaw, just 
exposed by syncobj.
In addition, Christian already has a fix for this Warning, I've tested. 
Please Christian send to public review.
</pre>
          </blockquote>
          <pre class="moz-quote-pre" wrap="">I backed out my revert of #2 (#1 still necessary) after adding the
lockdep regression fix, and now my CTS run got oomkilled after just a
few hours, with these notable lines in the unreclaimable slab info list:

[ 6314.373099] drm_sched_fence        69095KB      69095KB
[ 6314.373653] kmemleak_object       428249KB     428384KB
[ 6314.373736] kmalloc-262144           256KB        256KB
[ 6314.373743] kmalloc-131072           128KB        128KB
[ 6314.373750] kmalloc-65536             64KB         64KB
[ 6314.373756] kmalloc-32768           1472KB       1728KB
[ 6314.373763] kmalloc-16384             64KB         64KB
[ 6314.373770] kmalloc-8192             208KB        208KB
[ 6314.373778] kmalloc-4096            2408KB       2408KB
[ 6314.373784] kmalloc-2048             288KB        336KB
[ 6314.373792] kmalloc-1024            1457KB       1512KB
[ 6314.373800] kmalloc-512              854KB       1048KB
[ 6314.373808] kmalloc-256              188KB        268KB
[ 6314.373817] kmalloc-192            69141KB      69142KB
[ 6314.373824] kmalloc-64             47703KB      47704KB
[ 6314.373886] kmalloc-128            46396KB      46396KB
[ 6314.373894] kmem_cache                31KB         35KB

No results from kmemleak, though.
</pre>
        </blockquote>
        <pre class="moz-quote-pre" wrap="">OK, it looks like the #2 revert probably isn't related to the OOM issue.
Running a single job on otherwise unused DRM, watching /proc/slabinfo
every second for drm_sched_fence, I get:

drm_sched_fence        0      0    192   21    1 : tunables   32   16    8 : slabdata      0      0      0 : globalstat       0      0     0    0    0    0    0    0    0 : cpustat      0      0      0      0
drm_sched_fence       16     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence       13     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        6     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        4     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        2     21    192   21    1 : tunables   32   16    8 : slabdata      1      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0
drm_sched_fence        0     21    192   21    1 : tunables   32   16    8 : slabdata      0      1      0 : globalstat      16     16     1    0    0    0    0    0    0 : cpustat      5      1      6      0

So we generate a ton of fences, and I guess free them slowly because of
RCU?  And presumably kmemleak was sucking up lots of memory because of
how many of these objects were laying around.</pre>
      </blockquote>
    </blockquote>
    Hi Eric,<br>
    Thanks for testing, I checked the code, we could forget signal
    fence-array immediately after many reviews without callback for
    fence-array, everything is waiting fence_wait or syncobj free,
    that's why you see "free them slowly".<br>
    Maybe we just need one line change as attahced, Could you please
    have a try with it on your tests?<br>
    <br>
    btw,  I also didn't find there is fence-leak, or you can point me.<br>
    <br>
    Thanks,<br>
    David<br>
    <blockquote type="cite"
      cite="mid:199c35bc-e684-fbc4-dcef-d7105d82f0ff@gmail.com">
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net"> </blockquote>
      <br>
      That is certainly possible. Another possibility is that we don't
      drop the reference in dma-fence-array early enough.<br>
      <br>
      E.g. the dma-fence-array will keep the reference to its fences
      until it is destroyed, which is a bit late when you chain multiple
      dma-fence-array objects together.<br>
      <br>
      David can you take a look at this and propose a fix? That would
      probably be good to have fixed in dma-fence-array separately to
      the timeline work.<br>
      <br>
      Thanks,<br>
      Christian.<br>
      <br>
      <blockquote type="cite" cite="mid:87y3a1sx8t.fsf@anholt.net"> <br>
        <fieldset class="mimeAttachmentHeader"></fieldset>
        <pre class="moz-quote-pre" wrap="">_______________________________________________
dri-devel mailing list
<a class="moz-txt-link-abbreviated" href="mailto:dri-devel@lists.freedesktop.org" moz-do-not-send="true">dri-devel@lists.freedesktop.org</a>
<a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/dri-devel" moz-do-not-send="true">https://lists.freedesktop.org/mailman/listinfo/dri-devel</a>
</pre>
      </blockquote>
      <br>
    </blockquote>
    <br>
  </body>
</html>
From 6f03f63393be5897ef33a1714ea2ce8d18999014 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou@amd.com>
Date: Tue, 13 Nov 2018 14:10:13 +0800
Subject: [PATCH] drm/syncobj: enable signaling for fence_array of signal pt

otherwise fence_arrya cannot be signaled in time.

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index da2b85eec6cf..109abb5df399 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -253,6 +253,7 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
 		goto fail;
 	}
 
+	dma_fence_enable_sw_signaling(&signal_pt->fence_array->base);
 	spin_lock(&syncobj->pt_lock);
 	if (syncobj->signal_point >= point) {
 		DRM_WARN("A later signal is ready!");
Eric Anholt Nov. 15, 2018, 6:47 p.m. UTC | #12
Eric Anholt <eric@anholt.net> writes:

> [ Unknown signature status ]
> "Koenig, Christian" <Christian.Koenig@amd.com> writes:
>
>> Hi Eric,
>>
>> thanks for your time to test this.
>>
>> Am 14.11.18 um 08:07 schrieb Eric Anholt:
>>> zhoucm1 <zhoucm1@amd.com> writes:
>>>
>>>> Ping.. Eric, Could you help try attached patch for your memory issue?
>>>>
>>>> If you have no time, Could you tell me which tests you're using? Where
>>>> can I get the test app?
>>> You sent the patch yesterday.  I burned 3 of the 4 days I worked last
>>> week trying to track down this pile of regressions, and I had other
>>> things I was supposed to be doing.  My driver is *still* broken in
>>> drm-misc-next.
>>
>> Including the revert?
>
> As far as I know?  Today I worked on cleaning up my old igts so I'll
> have automated tests of v3d gpu reset instead of a git stash of some
> mesa hacks.  Once that's ready I'll rebase onto drm-misc-next and try
> again, hopefully tomorrow.

OK, got the igts working.  My test is:

Submit hanging job
Submit non-hanging job
Wait on non-hanging job.

With patch 1/2 in place, recovery works -- the wait returns after 2
seconds.  With patch 1/2 removed, the test never completes, and a ^C
produces:

[   96.795892] ------------[ cut here ]------------
[   96.795932] WARNING: CPU: 3 PID: 3149 at drivers/dma-buf/dma-fence.c:225 dma_fence_release+0xfc/0x1b0
[   96.795937] Modules linked in:
[   96.795948] CPU: 3 PID: 3149 Comm: v3d_cl_hang Not tainted 4.20.0-rc1+ #497
[   96.795953] Hardware name: Broadcom STB (Flattened Device Tree)
[   96.795971] [<c021237c>] (unwind_backtrace) from [<c020d8f0>] (show_stack+0x10/0x14)
[   96.795984] [<c020d8f0>] (show_stack) from [<c0c3f6a8>] (dump_stack+0xa8/0xd4)
[   96.795997] [<c0c3f6a8>] (dump_stack) from [<c0223568>] (__warn.part.3+0xbc/0xec)
[   96.796005] [<c0223568>] (__warn.part.3) from [<c02236f8>] (warn_slowpath_null+0x44/0x4c)
[   96.796012] [<c02236f8>] (warn_slowpath_null) from [<c0874b04>] (dma_fence_release+0xfc/0x1b0)
[   96.796024] [<c0874b04>] (dma_fence_release) from [<c06f08fc>] (drm_sched_fence_release_scheduled+0x64/0x68)
[   96.796032] [<c06f08fc>] (drm_sched_fence_release_scheduled) from [<c06f0f84>] (drm_sched_entity_fini+0x1e4/0x1e8)
[   96.796042] [<c06f0f84>] (drm_sched_entity_fini) from [<c083c9cc>] (v3d_postclose+0x18/0x2c)
[   96.796055] [<c083c9cc>] (v3d_postclose) from [<c06be7b8>] (drm_file_free.part.0+0x1e8/0x2c0)
[   96.796064] [<c06be7b8>] (drm_file_free.part.0) from [<c06bedf0>] (drm_release+0x8c/0x108)
[   96.796074] [<c06bedf0>] (drm_release) from [<c03930ec>] (__fput+0xa4/0x1e4)
[   96.796083] [<c03930ec>] (__fput) from [<c02480cc>] (task_work_run+0x9c/0xd0)
[   96.796092] [<c02480cc>] (task_work_run) from [<c022817c>] (do_exit+0x474/0xbf0)
[   96.796099] [<c022817c>] (do_exit) from [<c0229de0>] (do_group_exit+0x3c/0xbc)
[   96.796111] [<c0229de0>] (do_group_exit) from [<c02371a4>] (get_signal+0x250/0x9f4)
[   96.796118] [<c02371a4>] (get_signal) from [<c020cf00>] (do_work_pending+0x118/0x5f8)
[   96.796126] [<c020cf00>] (do_work_pending) from [<c0201034>] (slow_work_pending+0xc/0x20)
[   96.796131] Exception stack(0xe90f7fb0 to 0xe90f7ff8)
Dmitry Osipenko Dec. 19, 2018, 5:53 p.m. UTC | #13
On 08.11.2018 19:04, Eric Anholt wrote:
> Daniel suggested I submit this, since we're still seeing regressions
> from it.  This is a revert to before 48197bc564c7 ("drm: add syncobj
> timeline support v9") and its followon fixes.
> 
> Fixes this on first V3D testcase execution:
> 
> [   48.767088] ============================================
> [   48.772410] WARNING: possible recursive locking detected
> [   48.777739] 4.19.0-rc6+ #489 Not tainted
> [   48.781668] --------------------------------------------
> [   48.786993] shader_runner/3284 is trying to acquire lock:
> [   48.792408] ce309d7f (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
> [   48.800714]
> [   48.800714] but task is already holding lock:
> [   48.806559] c5952bd3 (&(&array->lock)->rlock){....}, at: dma_fence_add_callback+0x30/0x23c
> [   48.814862]
> [   48.814862] other info that might help us debug this:
> [   48.821410]  Possible unsafe locking scenario:
> [   48.821410]
> [   48.827338]        CPU0
> [   48.829788]        ----
> [   48.832239]   lock(&(&array->lock)->rlock);
> [   48.836434]   lock(&(&array->lock)->rlock);
> [   48.840640]
> [   48.840640]  *** DEADLOCK ***
> [   48.840640]
> [   48.846582]  May be due to missing lock nesting notation
> [  130.763560] 1 lock held by cts-runner/3270:
> [  130.767745]  #0: 7834b793 (&(&array->lock)->rlock){-...}, at: dma_fence_add_callback+0x30/0x23c
> [  130.776461]
>                stack backtrace:
> [  130.780825] CPU: 1 PID: 3270 Comm: cts-runner Not tainted 4.19.0-rc6+ #486
> [  130.787706] Hardware name: Broadcom STB (Flattened Device Tree)
> [  130.793645] [<c021269c>] (unwind_backtrace) from [<c020db1c>] (show_stack+0x10/0x14)
> [  130.801404] [<c020db1c>] (show_stack) from [<c0c2c4b0>] (dump_stack+0xa8/0xd4)
> [  130.808642] [<c0c2c4b0>] (dump_stack) from [<c0281a84>] (__lock_acquire+0x848/0x1a68)
> [  130.816483] [<c0281a84>] (__lock_acquire) from [<c02835d8>] (lock_acquire+0xd8/0x22c)
> [  130.824326] [<c02835d8>] (lock_acquire) from [<c0c49948>] (_raw_spin_lock_irqsave+0x54/0x68)
> [  130.832777] [<c0c49948>] (_raw_spin_lock_irqsave) from [<c086bf54>] (dma_fence_add_callback+0x30/0x23c)
> [  130.842183] [<c086bf54>] (dma_fence_add_callback) from [<c086d4c8>] (dma_fence_array_enable_signaling+0x58/0xec)
> [  130.852371] [<c086d4c8>] (dma_fence_array_enable_signaling) from [<c086c00c>] (dma_fence_add_callback+0xe8/0x23c)
> [  130.862647] [<c086c00c>] (dma_fence_add_callback) from [<c06d8774>] (drm_syncobj_wait_ioctl+0x518/0x614)
> [  130.872143] [<c06d8774>] (drm_syncobj_wait_ioctl) from [<c06b8458>] (drm_ioctl_kernel+0xb0/0xf0)
> [  130.880940] [<c06b8458>] (drm_ioctl_kernel) from [<c06b8818>] (drm_ioctl+0x1d8/0x390)
> [  130.888782] [<c06b8818>] (drm_ioctl) from [<c03a4510>] (do_vfs_ioctl+0xb0/0x8ac)
> [  130.896187] [<c03a4510>] (do_vfs_ioctl) from [<c03a4d40>] (ksys_ioctl+0x34/0x60)
> [  130.903593] [<c03a4d40>] (ksys_ioctl) from [<c0201000>] (ret_fast_syscall+0x0/0x28)
> 
> Cc: Chunming Zhou <david1.zhou@amd.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Eric Anholt <eric@anholt.net>
> ---


[snip]

> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>  
>  	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>  		for (i = 0; i < count; ++i) {
> -			if (entries[i].fence)
> -				continue;
> -
>  			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>  							      &entries[i].fence,
>  							      &entries[i].syncobj_cb,

Hello,

The above three removed lines we added in commit 337fe9f5c1e7de ("drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set") that fixed a memleak. Removal of the lines returns the memleak because of disbalanced fence refcounting and it looks like they were removed unintentionally in this patch.
Christian König Dec. 21, 2018, 6:27 p.m. UTC | #14
Am 19.12.18 um 18:53 schrieb Dmitry Osipenko:
> [SNIP]
>> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>   
>>   	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>   		for (i = 0; i < count; ++i) {
>> -			if (entries[i].fence)
>> -				continue;
>> -
>>   			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>   							      &entries[i].fence,
>>   							      &entries[i].syncobj_cb,
> Hello,
>
> The above three removed lines we added in commit 337fe9f5c1e7de ("drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set") that fixed a memleak. Removal of the lines returns the memleak because of disbalanced fence refcounting and it looks like they were removed unintentionally in this patch.

That was already fixed with 61a98b1b9a8c7 drm/syncobj: remove 
drm_syncobj_cb and cleanup.

This cleanup removed all the duplicate checking and is now adding the 
callback only once.

Regards,
Christian.
Dmitry Osipenko Dec. 21, 2018, 6:35 p.m. UTC | #15
On 21.12.2018 21:27, Christian König wrote:
> Am 19.12.18 um 18:53 schrieb Dmitry Osipenko:
>> [SNIP]
>>> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>         if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>           for (i = 0; i < count; ++i) {
>>> -            if (entries[i].fence)
>>> -                continue;
>>> -
>>>               drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>>                                     &entries[i].fence,
>>>                                     &entries[i].syncobj_cb,
>> Hello,
>>
>> The above three removed lines we added in commit 337fe9f5c1e7de ("drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set") that fixed a memleak. Removal of the lines returns the memleak because of disbalanced fence refcounting and it looks like they were removed unintentionally in this patch.
> 
> That was already fixed with 61a98b1b9a8c7 drm/syncobj: remove drm_syncobj_cb and cleanup.
> 
> This cleanup removed all the duplicate checking and is now adding the callback only once.

Okay, though that commit is not in linux-next. I assume it will show up eventually.
Christian König Dec. 21, 2018, 6:45 p.m. UTC | #16
Am 21.12.18 um 19:35 schrieb Dmitry Osipenko:
> On 21.12.2018 21:27, Christian König wrote:
>> Am 19.12.18 um 18:53 schrieb Dmitry Osipenko:
>>> [SNIP]
>>>> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>>          if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>>            for (i = 0; i < count; ++i) {
>>>> -            if (entries[i].fence)
>>>> -                continue;
>>>> -
>>>>                drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>>>                                      &entries[i].fence,
>>>>                                      &entries[i].syncobj_cb,
>>> Hello,
>>>
>>> The above three removed lines we added in commit 337fe9f5c1e7de ("drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set") that fixed a memleak. Removal of the lines returns the memleak because of disbalanced fence refcounting and it looks like they were removed unintentionally in this patch.
>> That was already fixed with 61a98b1b9a8c7 drm/syncobj: remove drm_syncobj_cb and cleanup.
>>
>> This cleanup removed all the duplicate checking and is now adding the callback only once.
> Okay, though that commit is not in linux-next. I assume it will show up eventually.

Need to double check, that could indeed be a problem.

Christian.
Dmitry Osipenko Dec. 21, 2018, 6:59 p.m. UTC | #17
On 21.12.2018 21:45, Koenig, Christian wrote:
> Am 21.12.18 um 19:35 schrieb Dmitry Osipenko:
>> On 21.12.2018 21:27, Christian König wrote:
>>> Am 19.12.18 um 18:53 schrieb Dmitry Osipenko:
>>>> [SNIP]
>>>>> @@ -931,9 +718,6 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>>>>>          if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>>>>            for (i = 0; i < count; ++i) {
>>>>> -            if (entries[i].fence)
>>>>> -                continue;
>>>>> -
>>>>>                drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>>>>>                                      &entries[i].fence,
>>>>>                                      &entries[i].syncobj_cb,
>>>> Hello,
>>>>
>>>> The above three removed lines we added in commit 337fe9f5c1e7de ("drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT is set") that fixed a memleak. Removal of the lines returns the memleak because of disbalanced fence refcounting and it looks like they were removed unintentionally in this patch.
>>> That was already fixed with 61a98b1b9a8c7 drm/syncobj: remove drm_syncobj_cb and cleanup.
>>>
>>> This cleanup removed all the duplicate checking and is now adding the callback only once.
>> Okay, though that commit is not in linux-next. I assume it will show up eventually.
> 
> Need to double check, that could indeed be a problem.

Thanks for taking care!
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index da8175d9c6ff..e2c5b3ca4824 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,9 +56,6 @@ 
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
 
-/* merge normal syncobj to timeline syncobj, the point interval is 1 */
-#define DRM_SYNCOBJ_BINARY_POINT 1
-
 struct drm_syncobj_stub_fence {
 	struct dma_fence base;
 	spinlock_t lock;
@@ -74,29 +71,7 @@  static const struct dma_fence_ops drm_syncobj_stub_fence_ops = {
 	.get_timeline_name = drm_syncobj_stub_fence_get_name,
 };
 
-struct drm_syncobj_signal_pt {
-	struct dma_fence_array *fence_array;
-	u64    value;
-	struct list_head list;
-};
-
-static DEFINE_SPINLOCK(signaled_fence_lock);
-static struct dma_fence signaled_fence;
 
-static struct dma_fence *drm_syncobj_get_stub_fence(void)
-{
-	spin_lock(&signaled_fence_lock);
-	if (!signaled_fence.ops) {
-		dma_fence_init(&signaled_fence,
-			       &drm_syncobj_stub_fence_ops,
-			       &signaled_fence_lock,
-			       0, 0);
-		dma_fence_signal_locked(&signaled_fence);
-	}
-	spin_unlock(&signaled_fence_lock);
-
-	return dma_fence_get(&signaled_fence);
-}
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -123,27 +98,6 @@  struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 }
 EXPORT_SYMBOL(drm_syncobj_find);
 
-static struct dma_fence *
-drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj,
-				     uint64_t point)
-{
-	struct drm_syncobj_signal_pt *signal_pt;
-
-	if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) &&
-	    (point <= syncobj->timeline))
-		return drm_syncobj_get_stub_fence();
-
-	list_for_each_entry(signal_pt, &syncobj->signal_pt_list, list) {
-		if (point > signal_pt->value)
-			continue;
-		if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) &&
-		    (point != signal_pt->value))
-			continue;
-		return dma_fence_get(&signal_pt->fence_array->base);
-	}
-	return NULL;
-}
-
 static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
 					    struct drm_syncobj_cb *cb,
 					    drm_syncobj_func_t func)
@@ -152,158 +106,53 @@  static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
 	list_add_tail(&cb->node, &syncobj->cb_list);
 }
 
-static void drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
-						  struct dma_fence **fence,
-						  struct drm_syncobj_cb *cb,
-						  drm_syncobj_func_t func)
+static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
+						 struct dma_fence **fence,
+						 struct drm_syncobj_cb *cb,
+						 drm_syncobj_func_t func)
 {
-	u64 pt_value = 0;
-
-	WARN_ON(*fence);
-
-	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
-		/*BINARY syncobj always wait on last pt */
-		pt_value = syncobj->signal_point;
+	int ret;
 
-		if (pt_value == 0)
-			pt_value += DRM_SYNCOBJ_BINARY_POINT;
-	}
+	*fence = drm_syncobj_fence_get(syncobj);
+	if (*fence)
+		return 1;
 
-	mutex_lock(&syncobj->cb_mutex);
-	spin_lock(&syncobj->pt_lock);
-	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value);
-	spin_unlock(&syncobj->pt_lock);
-	if (!*fence)
+	spin_lock(&syncobj->lock);
+	/* We've already tried once to get a fence and failed.  Now that we
+	 * have the lock, try one more time just to be sure we don't add a
+	 * callback when a fence has already been set.
+	 */
+	if (syncobj->fence) {
+		*fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
+								 lockdep_is_held(&syncobj->lock)));
+		ret = 1;
+	} else {
+		*fence = NULL;
 		drm_syncobj_add_callback_locked(syncobj, cb, func);
-	mutex_unlock(&syncobj->cb_mutex);
-}
-
-static void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
-					struct drm_syncobj_cb *cb)
-{
-	mutex_lock(&syncobj->cb_mutex);
-	list_del_init(&cb->node);
-	mutex_unlock(&syncobj->cb_mutex);
-}
+		ret = 0;
+	}
+	spin_unlock(&syncobj->lock);
 
-static void drm_syncobj_init(struct drm_syncobj *syncobj)
-{
-	spin_lock(&syncobj->pt_lock);
-	syncobj->timeline_context = dma_fence_context_alloc(1);
-	syncobj->timeline = 0;
-	syncobj->signal_point = 0;
-	init_waitqueue_head(&syncobj->wq);
-
-	INIT_LIST_HEAD(&syncobj->signal_pt_list);
-	spin_unlock(&syncobj->pt_lock);
+	return ret;
 }
 
-static void drm_syncobj_fini(struct drm_syncobj *syncobj)
+void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
+			      struct drm_syncobj_cb *cb,
+			      drm_syncobj_func_t func)
 {
-	struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp;
-
-	spin_lock(&syncobj->pt_lock);
-	list_for_each_entry_safe(signal_pt, tmp,
-				 &syncobj->signal_pt_list, list) {
-		list_del(&signal_pt->list);
-		dma_fence_put(&signal_pt->fence_array->base);
-		kfree(signal_pt);
-	}
-	spin_unlock(&syncobj->pt_lock);
+	spin_lock(&syncobj->lock);
+	drm_syncobj_add_callback_locked(syncobj, cb, func);
+	spin_unlock(&syncobj->lock);
 }
 
-static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj,
-					struct dma_fence *fence,
-					u64 point)
+void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
+				 struct drm_syncobj_cb *cb)
 {
-	struct drm_syncobj_signal_pt *signal_pt =
-		kzalloc(sizeof(struct drm_syncobj_signal_pt), GFP_KERNEL);
-	struct drm_syncobj_signal_pt *tail_pt;
-	struct dma_fence **fences;
-	int num_fences = 0;
-	int ret = 0, i;
-
-	if (!signal_pt)
-		return -ENOMEM;
-	if (!fence)
-		goto out;
-
-	fences = kmalloc_array(sizeof(void *), 2, GFP_KERNEL);
-	if (!fences) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	fences[num_fences++] = dma_fence_get(fence);
-	/* timeline syncobj must take this dependency */
-	if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) {
-		spin_lock(&syncobj->pt_lock);
-		if (!list_empty(&syncobj->signal_pt_list)) {
-			tail_pt = list_last_entry(&syncobj->signal_pt_list,
-						  struct drm_syncobj_signal_pt, list);
-			fences[num_fences++] =
-				dma_fence_get(&tail_pt->fence_array->base);
-		}
-		spin_unlock(&syncobj->pt_lock);
-	}
-	signal_pt->fence_array = dma_fence_array_create(num_fences, fences,
-							syncobj->timeline_context,
-							point, false);
-	if (!signal_pt->fence_array) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	spin_lock(&syncobj->pt_lock);
-	if (syncobj->signal_point >= point) {
-		DRM_WARN("A later signal is ready!");
-		spin_unlock(&syncobj->pt_lock);
-		goto exist;
-	}
-	signal_pt->value = point;
-	list_add_tail(&signal_pt->list, &syncobj->signal_pt_list);
-	syncobj->signal_point = point;
-	spin_unlock(&syncobj->pt_lock);
-	wake_up_all(&syncobj->wq);
-
-	return 0;
-exist:
-	dma_fence_put(&signal_pt->fence_array->base);
-fail:
-	for (i = 0; i < num_fences; i++)
-		dma_fence_put(fences[i]);
-	kfree(fences);
-out:
-	kfree(signal_pt);
-	return ret;
+	spin_lock(&syncobj->lock);
+	list_del_init(&cb->node);
+	spin_unlock(&syncobj->lock);
 }
 
-static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj)
-{
-	struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt;
-
-	spin_lock(&syncobj->pt_lock);
-	tail_pt = list_last_entry(&syncobj->signal_pt_list,
-				  struct drm_syncobj_signal_pt,
-				  list);
-	list_for_each_entry_safe(signal_pt, tmp,
-				 &syncobj->signal_pt_list, list) {
-		if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY &&
-		    signal_pt == tail_pt)
-			continue;
-		if (dma_fence_is_signaled(&signal_pt->fence_array->base)) {
-			syncobj->timeline = signal_pt->value;
-			list_del(&signal_pt->list);
-			dma_fence_put(&signal_pt->fence_array->base);
-			kfree(signal_pt);
-		} else {
-			/*signal_pt is in order in list, from small to big, so
-			 * the later must not be signal either */
-			break;
-		}
-	}
-
-	spin_unlock(&syncobj->pt_lock);
-}
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
@@ -316,30 +165,28 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       u64 point,
 			       struct dma_fence *fence)
 {
-	u64 pt_value = point;
-
-	drm_syncobj_garbage_collection(syncobj);
-	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
-		if (!fence) {
-			drm_syncobj_fini(syncobj);
-			drm_syncobj_init(syncobj);
-			return;
-		}
-		pt_value = syncobj->signal_point +
-			DRM_SYNCOBJ_BINARY_POINT;
-	}
-	drm_syncobj_create_signal_pt(syncobj, fence, pt_value);
-	if (fence) {
-		struct drm_syncobj_cb *cur, *tmp;
-		LIST_HEAD(cb_list);
+	struct dma_fence *old_fence;
+	struct drm_syncobj_cb *cur, *tmp;
+
+	if (fence)
+		dma_fence_get(fence);
+
+	spin_lock(&syncobj->lock);
 
-		mutex_lock(&syncobj->cb_mutex);
+	old_fence = rcu_dereference_protected(syncobj->fence,
+					      lockdep_is_held(&syncobj->lock));
+	rcu_assign_pointer(syncobj->fence, fence);
+
+	if (fence != old_fence) {
 		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
 			list_del_init(&cur->node);
 			cur->func(syncobj, cur);
 		}
-		mutex_unlock(&syncobj->cb_mutex);
 	}
+
+	spin_unlock(&syncobj->lock);
+
+	dma_fence_put(old_fence);
 }
 EXPORT_SYMBOL(drm_syncobj_replace_fence);
 
@@ -362,64 +209,6 @@  static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 	return 0;
 }
 
-static int
-drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags,
-		      struct dma_fence **fence)
-{
-	int ret = 0;
-
-	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
-		ret = wait_event_interruptible(syncobj->wq,
-					       point <= syncobj->signal_point);
-		if (ret < 0)
-			return ret;
-	}
-	spin_lock(&syncobj->pt_lock);
-	*fence = drm_syncobj_find_signal_pt_for_point(syncobj, point);
-	if (!*fence)
-		ret = -EINVAL;
-	spin_unlock(&syncobj->pt_lock);
-	return ret;
-}
-
-/**
- * drm_syncobj_search_fence - lookup and reference the fence in a sync object or
- * in a timeline point
- * @syncobj: sync object pointer
- * @point: timeline point
- * @flags: DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT or not
- * @fence: out parameter for the fence
- *
- * if flags is DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, the function will block
- * here until specific timeline points is reached.
- * if not, you need a submit thread and block in userspace until all future
- * timeline points have materialized, only then you can submit to the kernel,
- * otherwise, function will fail to return fence.
- *
- * Returns 0 on success or a negative error value on failure. On success @fence
- * contains a reference to the fence, which must be released by calling
- * dma_fence_put().
- */
-int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point,
-			     u64 flags, struct dma_fence **fence)
-{
-	u64 pt_value = point;
-
-	if (!syncobj)
-		return -ENOENT;
-
-	drm_syncobj_garbage_collection(syncobj);
-	if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) {
-		/*BINARY syncobj always wait on last pt */
-		pt_value = syncobj->signal_point;
-
-		if (pt_value == 0)
-			pt_value += DRM_SYNCOBJ_BINARY_POINT;
-	}
-	return drm_syncobj_point_get(syncobj, pt_value, flags, fence);
-}
-EXPORT_SYMBOL(drm_syncobj_search_fence);
-
 /**
  * drm_syncobj_find_fence - lookup and reference the fence in a sync object
  * @file_private: drm file private pointer
@@ -429,7 +218,7 @@  EXPORT_SYMBOL(drm_syncobj_search_fence);
  * @fence: out parameter for the fence
  *
  * This is just a convenience function that combines drm_syncobj_find() and
- * drm_syncobj_lookup_fence().
+ * drm_syncobj_fence_get().
  *
  * Returns 0 on success or a negative error value on failure. On success @fence
  * contains a reference to the fence, which must be released by calling
@@ -440,11 +229,16 @@  int drm_syncobj_find_fence(struct drm_file *file_private,
 			   struct dma_fence **fence)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-	int ret;
+	int ret = 0;
 
-	ret = drm_syncobj_search_fence(syncobj, point, flags, fence);
-	if (syncobj)
-		drm_syncobj_put(syncobj);
+	if (!syncobj)
+		return -ENOENT;
+
+	*fence = drm_syncobj_fence_get(syncobj);
+	if (!*fence) {
+		ret = -EINVAL;
+	}
+	drm_syncobj_put(syncobj);
 	return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
@@ -460,7 +254,7 @@  void drm_syncobj_free(struct kref *kref)
 	struct drm_syncobj *syncobj = container_of(kref,
 						   struct drm_syncobj,
 						   refcount);
-	drm_syncobj_fini(syncobj);
+	drm_syncobj_replace_fence(syncobj, 0, NULL);
 	kfree(syncobj);
 }
 EXPORT_SYMBOL(drm_syncobj_free);
@@ -489,13 +283,7 @@  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 
 	kref_init(&syncobj->refcount);
 	INIT_LIST_HEAD(&syncobj->cb_list);
-	spin_lock_init(&syncobj->pt_lock);
-	mutex_init(&syncobj->cb_mutex);
-	if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE)
-		syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE;
-	else
-		syncobj->type = DRM_SYNCOBJ_TYPE_BINARY;
-	drm_syncobj_init(syncobj);
+	spin_lock_init(&syncobj->lock);
 
 	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
 		ret = drm_syncobj_assign_null_handle(syncobj);
@@ -778,8 +566,7 @@  drm_syncobj_create_ioctl(struct drm_device *dev, void *data,
 		return -EOPNOTSUPP;
 
 	/* no valid flags yet */
-	if (args->flags & ~(DRM_SYNCOBJ_CREATE_SIGNALED |
-			    DRM_SYNCOBJ_CREATE_TYPE_TIMELINE))
+	if (args->flags & ~DRM_SYNCOBJ_CREATE_SIGNALED)
 		return -EINVAL;
 
 	return drm_syncobj_create_as_handle(file_private,
@@ -872,8 +659,9 @@  static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
 	struct syncobj_wait_entry *wait =
 		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
 
-	drm_syncobj_search_fence(syncobj, 0, 0, &wait->fence);
-
+	/* This happens inside the syncobj lock */
+	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
+							      lockdep_is_held(&syncobj->lock)));
 	wake_up_process(wait->task);
 }
 
@@ -899,8 +687,7 @@  static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	signaled_count = 0;
 	for (i = 0; i < count; ++i) {
 		entries[i].task = current;
-		drm_syncobj_search_fence(syncobjs[i], 0, 0,
-					 &entries[i].fence);
+		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
 		if (!entries[i].fence) {
 			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 				continue;
@@ -931,9 +718,6 @@  static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 
 	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 		for (i = 0; i < count; ++i) {
-			if (entries[i].fence)
-				continue;
-
 			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
 							      &entries[i].fence,
 							      &entries[i].syncobj_cb,
@@ -1165,13 +949,12 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < args->count_handles; i++) {
-		drm_syncobj_fini(syncobjs[i]);
-		drm_syncobj_init(syncobjs[i]);
-	}
+	for (i = 0; i < args->count_handles; i++)
+		drm_syncobj_replace_fence(syncobjs[i], 0, NULL);
+
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
-	return ret;
+	return 0;
 }
 
 int
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 29244cbcd05e..2eda44def639 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -30,15 +30,10 @@ 
 
 struct drm_syncobj_cb;
 
-enum drm_syncobj_type {
-	DRM_SYNCOBJ_TYPE_BINARY,
-	DRM_SYNCOBJ_TYPE_TIMELINE
-};
-
 /**
  * struct drm_syncobj - sync object.
  *
- * This structure defines a generic sync object which is timeline based.
+ * This structure defines a generic sync object which wraps a &dma_fence.
  */
 struct drm_syncobj {
 	/**
@@ -46,42 +41,21 @@  struct drm_syncobj {
 	 */
 	struct kref refcount;
 	/**
-	 * @type: indicate syncobj type
-	 */
-	enum drm_syncobj_type type;
-	/**
-	 * @wq: wait signal operation work queue
-	 */
-	wait_queue_head_t	wq;
-	/**
-	 * @timeline_context: fence context used by timeline
+	 * @fence:
+	 * NULL or a pointer to the fence bound to this object.
+	 *
+	 * This field should not be used directly. Use drm_syncobj_fence_get()
+	 * and drm_syncobj_replace_fence() instead.
 	 */
-	u64 timeline_context;
+	struct dma_fence __rcu *fence;
 	/**
-	 * @timeline: syncobj timeline value, which indicates point is signaled.
+	 * @cb_list: List of callbacks to call when the &fence gets replaced.
 	 */
-	u64 timeline;
-	/**
-	 * @signal_point: which indicates the latest signaler point.
-	 */
-	u64 signal_point;
-	/**
-	 * @signal_pt_list: signaler point list.
-	 */
-	struct list_head signal_pt_list;
-
-	/**
-         * @cb_list: List of callbacks to call when the &fence gets replaced.
-         */
 	struct list_head cb_list;
 	/**
-	 * @pt_lock: Protects pt list.
+	 * @lock: Protects &cb_list and write-locks &fence.
 	 */
-	spinlock_t pt_lock;
-	/**
-	 * @cb_mutex: Protects syncobj cb list.
-	 */
-	struct mutex cb_mutex;
+	spinlock_t lock;
 	/**
 	 * @file: A file backing for this syncobj.
 	 */
@@ -94,7 +68,7 @@  typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
 /**
  * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
  * @node: used by drm_syncob_add_callback to append this struct to
- *       &drm_syncobj.cb_list
+ *	  &drm_syncobj.cb_list
  * @func: drm_syncobj_func_t to call
  *
  * This struct will be initialized by drm_syncobj_add_callback, additional
@@ -132,6 +106,29 @@  drm_syncobj_put(struct drm_syncobj *obj)
 	kref_put(&obj->refcount, drm_syncobj_free);
 }
 
+/**
+ * drm_syncobj_fence_get - get a reference to a fence in a sync object
+ * @syncobj: sync object.
+ *
+ * This acquires additional reference to &drm_syncobj.fence contained in @obj,
+ * if not NULL. It is illegal to call this without already holding a reference.
+ * No locks required.
+ *
+ * Returns:
+ * Either the fence of @obj or NULL if there's none.
+ */
+static inline struct dma_fence *
+drm_syncobj_fence_get(struct drm_syncobj *syncobj)
+{
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = dma_fence_get_rcu_safe(&syncobj->fence);
+	rcu_read_unlock();
+
+	return fence;
+}
+
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, u64 point,
@@ -145,7 +142,5 @@  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 int drm_syncobj_get_handle(struct drm_file *file_private,
 			   struct drm_syncobj *syncobj, u32 *handle);
 int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd);
-int drm_syncobj_search_fence(struct drm_syncobj *syncobj, u64 point, u64 flags,
-			     struct dma_fence **fence);
 
 #endif