diff mbox series

[4/7] dma-buf: add dma_fence_chain_garbage_collect

Message ID 20210610091800.1833-5-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/7] dma-buf: some dma_fence_chain improvements | expand

Commit Message

Christian König June 10, 2021, 9:17 a.m. UTC
Add some rather sophisticated lockless garbage collection
for dma_fence_chain objects.

For this keep all initialized dma_fence_chain nodes an a
queue and trigger garbage collection before a new one is
allocated.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-fence-chain.c | 160 +++++++++++++++++++++++++-----
 include/linux/dma-fence-chain.h   |   5 +
 2 files changed, 142 insertions(+), 23 deletions(-)

Comments

Daniel Vetter June 11, 2021, 8:58 a.m. UTC | #1
On Thu, Jun 10, 2021 at 11:17:57AM +0200, Christian König wrote:
> Add some rather sophisticated lockless garbage collection
> for dma_fence_chain objects.
> 
> For this keep all initialized dma_fence_chain nodes an a
> queue and trigger garbage collection before a new one is
> allocated.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Uh hand-rolled lockless list, I'm not a fan.

But the real question here is why? This is a global list, so it's going to
look great on your desktop, but gpus are for servers now and those are
NUMA. So just from that pov doing garbage-collection individually still
feels like a much better idea.

So what's the problem your trying to solve here?
-Daniel

> ---
>  drivers/dma-buf/dma-fence-chain.c | 160 +++++++++++++++++++++++++-----
>  include/linux/dma-fence-chain.h   |   5 +
>  2 files changed, 142 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> index 1b4cb3e5cec9..c2f0b69eabb7 100644
> --- a/drivers/dma-buf/dma-fence-chain.c
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -9,8 +9,53 @@
>  
>  #include <linux/dma-fence-chain.h>
>  
> +static struct dma_fence_chain __rcu *fifo_front;
> +static struct dma_fence_chain __rcu **fifo_back = &fifo_front;
> +
>  static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
>  
> +/**
> + * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
> + * @chain: the chain node to enqueue
> + *
> + * Add the chain node to the end of the gc fifo.
> + */
> +static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
> +{
> +	struct dma_fence_chain __rcu **tmp;
> +
> +	RCU_INIT_POINTER(chain->next, NULL);
> +	tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
> +		   &chain->next);
> +
> +	/* This is intentionally unordered since we only need the fifo for gc */
> +	rcu_assign_pointer(*tmp, chain);
> +}
> +
> +/**
> + * dma_fence_chain_dequeue - deqeueue a chain node for garbage collection
> + *
> + * Remove the first chain node from the gc fifo while making sure that always
> + * keep at least one node on the fifo for lockless fifo implementation.
> + * Returns the dequeued chain node or NULL.
> + */
> +static struct dma_fence_chain *dma_fence_chain_dequeue(void)
> +{
> +	struct dma_fence_chain *chain, *tmp;
> +
> +	rcu_read_lock();
> +	chain = rcu_dereference(fifo_front);
> +	/* Never dequeue the last chain node for lockless fifo */
> +	if (unlikely(!chain || !rcu_access_pointer(chain->next))) {
> +		rcu_read_unlock();
> +		return NULL;
> +	}
> +	tmp = cmpxchg((struct dma_fence_chain __force **)&fifo_front,
> +		      chain, rcu_access_pointer(chain->next));
> +	rcu_read_unlock();
> +	return tmp == chain ? chain : NULL;
> +}
> +
>  /**
>   * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
>   * @chain: chain node to get the previous node from
> @@ -28,6 +73,43 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
>  	return prev;
>  }
>  
> +/**
> + * dma_fence_chain_try_replace - try to replace the prev node
> + * @chain: Chain node we try to replace prev.
> + * @prev: the old prev node
> + *
> + * Try to replace the previous chain node when it or its containing fence is
> + * signaled. Returns true if we tried, false if we need to wait.
> + */
> +static bool dma_fence_chain_try_replace(struct dma_fence_chain *chain,
> +					struct dma_fence *prev)
> +{
> +	struct dma_fence *replacement, *tmp;
> +	struct dma_fence_chain *prev_chain;
> +
> +	prev_chain = to_dma_fence_chain(prev);
> +	if (prev_chain) {
> +		if (!dma_fence_is_signaled(prev_chain->fence))
> +			return false;
> +
> +		replacement = dma_fence_chain_get_prev(prev_chain);
> +	} else {
> +		if (!dma_fence_is_signaled(prev))
> +			return false;
> +
> +		replacement = NULL;
> +	}
> +
> +	tmp = cmpxchg((struct dma_fence __force **)&chain->prev, prev,
> +		      replacement);
> +	if (tmp == prev)
> +		dma_fence_put(tmp);
> +	else
> +		dma_fence_put(replacement);
> +
> +	return true;
> +}
> +
>  /**
>   * dma_fence_chain_walk - chain walking function
>   * @fence: current chain node
> @@ -38,8 +120,8 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
>   */
>  struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>  {
> -	struct dma_fence_chain *chain, *prev_chain;
> -	struct dma_fence *prev, *replacement, *tmp;
> +	struct dma_fence_chain *chain;
> +	struct dma_fence *prev;
>  
>  	chain = to_dma_fence_chain(fence);
>  	if (!chain) {
> @@ -48,26 +130,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>  	}
>  
>  	while ((prev = dma_fence_chain_get_prev(chain))) {
> -
> -		prev_chain = to_dma_fence_chain(prev);
> -		if (prev_chain) {
> -			if (!dma_fence_is_signaled(prev_chain->fence))
> -				break;
> -
> -			replacement = dma_fence_chain_get_prev(prev_chain);
> -		} else {
> -			if (!dma_fence_is_signaled(prev))
> -				break;
> -
> -			replacement = NULL;
> -		}
> -
> -		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
> -			      prev, replacement);
> -		if (tmp == prev)
> -			dma_fence_put(tmp);
> -		else
> -			dma_fence_put(replacement);
> +		if (!dma_fence_chain_try_replace(chain, prev))
> +			break;
>  		dma_fence_put(prev);
>  	}
>  
> @@ -205,7 +269,21 @@ static void dma_fence_chain_release(struct dma_fence *fence)
>  	dma_fence_put(prev);
>  
>  	dma_fence_put(chain->fence);
> -	dma_fence_free(fence);
> +	WRITE_ONCE(chain->fence, NULL);
> +
> +	/*
> +	 * Don't garbage collect here to avoid recursion and potential stack
> +	 * overflow.
> +	 */
> +	chain = dma_fence_chain_dequeue();
> +	if (!chain)
> +		return;
> +
> +	if (kref_read(&chain->base.refcount) ||
> +	    READ_ONCE(chain->fence))
> +		dma_fence_chain_enqueue(chain);
> +	else
> +		dma_fence_free(&chain->base);
>  }
>  
>  const struct dma_fence_ops dma_fence_chain_ops = {
> @@ -218,6 +296,40 @@ const struct dma_fence_ops dma_fence_chain_ops = {
>  };
>  EXPORT_SYMBOL(dma_fence_chain_ops);
>  
> +/**
> + * dma_fence_chain_garbage_collect - cleanup chain nodes
> + *
> + * Do some garbage collection and try to release chain nodes.
> + */
> +void dma_fence_chain_garbage_collect(void)
> +{
> +	struct dma_fence_chain *chain = dma_fence_chain_dequeue();
> +
> +	if (!chain)
> +		return;
> +
> +	if (!dma_fence_get_rcu(&chain->base)) {
> +		/* Unused, check if it's also clean */
> +		if (likely(!READ_ONCE(chain->fence))) {
> +			dma_fence_free(&chain->base);
> +			return;
> +		}
> +
> +	} else {
> +		struct dma_fence *prev;
> +
> +		/* Used, do some chain walk */
> +		prev = dma_fence_chain_get_prev(chain);
> +		if (prev) {
> +			dma_fence_chain_try_replace(chain, prev);
> +			dma_fence_put(prev);
> +		}
> +		dma_fence_put(&chain->base);
> +	}
> +	dma_fence_chain_enqueue(chain);
> +}
> +EXPORT_SYMBOL(dma_fence_chain_garbage_collect);
> +
>  /**
>   * dma_fence_chain_init - initialize a fence chain
>   * @chain: the chain node to initialize
> @@ -254,5 +366,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
>  
>  	dma_fence_init(&chain->base, &dma_fence_chain_ops,
>  		       &chain->lock, context, seqno);
> +	dma_fence_chain_enqueue(chain);
>  }
> +
>  EXPORT_SYMBOL(dma_fence_chain_init);
> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
> index 5f45689a6d2e..b412b5396434 100644
> --- a/include/linux/dma-fence-chain.h
> +++ b/include/linux/dma-fence-chain.h
> @@ -19,6 +19,7 @@
>   * @base: fence base class
>   * @lock: spinlock for fence handling
>   * @prev: previous fence of the chain
> + * @next: next chain node for garbage collection
>   * @prev_seqno: original previous seqno before garbage collection
>   * @fence: encapsulated fence
>   * @cb: callback structure for signaling
> @@ -27,6 +28,7 @@
>  struct dma_fence_chain {
>  	struct dma_fence base;
>  	struct dma_fence __rcu *prev;
> +	struct dma_fence_chain __rcu *next;
>  	u64 prev_seqno;
>  	struct dma_fence *fence;
>  	union {
> @@ -38,6 +40,8 @@ struct dma_fence_chain {
>  
>  extern const struct dma_fence_ops dma_fence_chain_ops;
>  
> +void dma_fence_chain_garbage_collect(void);
> +
>  /**
>   * to_dma_fence_chain - cast a fence to a dma_fence_chain
>   * @fence: fence to cast to a dma_fence_array
> @@ -61,6 +65,7 @@ to_dma_fence_chain(struct dma_fence *fence)
>   */
>  static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
>  {
> +	dma_fence_chain_garbage_collect();
>  	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>  };
>  
> -- 
> 2.25.1
>
Christian König June 11, 2021, 10:07 a.m. UTC | #2
Am 11.06.21 um 10:58 schrieb Daniel Vetter:
> On Thu, Jun 10, 2021 at 11:17:57AM +0200, Christian König wrote:
>> Add some rather sophisticated lockless garbage collection
>> for dma_fence_chain objects.
>>
>> For this keep all initialized dma_fence_chain nodes an a
>> queue and trigger garbage collection before a new one is
>> allocated.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Uh hand-rolled lockless list, I'm not a fan.
>
> But the real question here is why? This is a global list, so it's going to
> look great on your desktop, but gpus are for servers now and those are
> NUMA. So just from that pov doing garbage-collection individually still
> feels like a much better idea.

Yeah, I was pondering on that quite a bit as well. That why I added the 
multi threaded alloc/free test.

> So what's the problem your trying to solve here?

I was not sure if the chain is garbage collected enough when used for 
the dma_resv exclusive object.

But I think we could just drop this for now and just see how it goes.

Christian.

> -Daniel
>
>> ---
>>   drivers/dma-buf/dma-fence-chain.c | 160 +++++++++++++++++++++++++-----
>>   include/linux/dma-fence-chain.h   |   5 +
>>   2 files changed, 142 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>> index 1b4cb3e5cec9..c2f0b69eabb7 100644
>> --- a/drivers/dma-buf/dma-fence-chain.c
>> +++ b/drivers/dma-buf/dma-fence-chain.c
>> @@ -9,8 +9,53 @@
>>   
>>   #include <linux/dma-fence-chain.h>
>>   
>> +static struct dma_fence_chain __rcu *fifo_front;
>> +static struct dma_fence_chain __rcu **fifo_back = &fifo_front;
>> +
>>   static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
>>   
>> +/**
>> + * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
>> + * @chain: the chain node to enqueue
>> + *
>> + * Add the chain node to the end of the gc fifo.
>> + */
>> +static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
>> +{
>> +	struct dma_fence_chain __rcu **tmp;
>> +
>> +	RCU_INIT_POINTER(chain->next, NULL);
>> +	tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
>> +		   &chain->next);
>> +
>> +	/* This is intentionally unordered since we only need the fifo for gc */
>> +	rcu_assign_pointer(*tmp, chain);
>> +}
>> +
>> +/**
>> + * dma_fence_chain_dequeue - deqeueue a chain node for garbage collection
>> + *
>> + * Remove the first chain node from the gc fifo while making sure that always
>> + * keep at least one node on the fifo for lockless fifo implementation.
>> + * Returns the dequeued chain node or NULL.
>> + */
>> +static struct dma_fence_chain *dma_fence_chain_dequeue(void)
>> +{
>> +	struct dma_fence_chain *chain, *tmp;
>> +
>> +	rcu_read_lock();
>> +	chain = rcu_dereference(fifo_front);
>> +	/* Never dequeue the last chain node for lockless fifo */
>> +	if (unlikely(!chain || !rcu_access_pointer(chain->next))) {
>> +		rcu_read_unlock();
>> +		return NULL;
>> +	}
>> +	tmp = cmpxchg((struct dma_fence_chain __force **)&fifo_front,
>> +		      chain, rcu_access_pointer(chain->next));
>> +	rcu_read_unlock();
>> +	return tmp == chain ? chain : NULL;
>> +}
>> +
>>   /**
>>    * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
>>    * @chain: chain node to get the previous node from
>> @@ -28,6 +73,43 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
>>   	return prev;
>>   }
>>   
>> +/**
>> + * dma_fence_chain_try_replace - try to replace the prev node
>> + * @chain: Chain node we try to replace prev.
>> + * @prev: the old prev node
>> + *
>> + * Try to replace the previous chain node when it or its containing fence is
>> + * signaled. Returns true if we tried, false if we need to wait.
>> + */
>> +static bool dma_fence_chain_try_replace(struct dma_fence_chain *chain,
>> +					struct dma_fence *prev)
>> +{
>> +	struct dma_fence *replacement, *tmp;
>> +	struct dma_fence_chain *prev_chain;
>> +
>> +	prev_chain = to_dma_fence_chain(prev);
>> +	if (prev_chain) {
>> +		if (!dma_fence_is_signaled(prev_chain->fence))
>> +			return false;
>> +
>> +		replacement = dma_fence_chain_get_prev(prev_chain);
>> +	} else {
>> +		if (!dma_fence_is_signaled(prev))
>> +			return false;
>> +
>> +		replacement = NULL;
>> +	}
>> +
>> +	tmp = cmpxchg((struct dma_fence __force **)&chain->prev, prev,
>> +		      replacement);
>> +	if (tmp == prev)
>> +		dma_fence_put(tmp);
>> +	else
>> +		dma_fence_put(replacement);
>> +
>> +	return true;
>> +}
>> +
>>   /**
>>    * dma_fence_chain_walk - chain walking function
>>    * @fence: current chain node
>> @@ -38,8 +120,8 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
>>    */
>>   struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>>   {
>> -	struct dma_fence_chain *chain, *prev_chain;
>> -	struct dma_fence *prev, *replacement, *tmp;
>> +	struct dma_fence_chain *chain;
>> +	struct dma_fence *prev;
>>   
>>   	chain = to_dma_fence_chain(fence);
>>   	if (!chain) {
>> @@ -48,26 +130,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>>   	}
>>   
>>   	while ((prev = dma_fence_chain_get_prev(chain))) {
>> -
>> -		prev_chain = to_dma_fence_chain(prev);
>> -		if (prev_chain) {
>> -			if (!dma_fence_is_signaled(prev_chain->fence))
>> -				break;
>> -
>> -			replacement = dma_fence_chain_get_prev(prev_chain);
>> -		} else {
>> -			if (!dma_fence_is_signaled(prev))
>> -				break;
>> -
>> -			replacement = NULL;
>> -		}
>> -
>> -		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
>> -			      prev, replacement);
>> -		if (tmp == prev)
>> -			dma_fence_put(tmp);
>> -		else
>> -			dma_fence_put(replacement);
>> +		if (!dma_fence_chain_try_replace(chain, prev))
>> +			break;
>>   		dma_fence_put(prev);
>>   	}
>>   
>> @@ -205,7 +269,21 @@ static void dma_fence_chain_release(struct dma_fence *fence)
>>   	dma_fence_put(prev);
>>   
>>   	dma_fence_put(chain->fence);
>> -	dma_fence_free(fence);
>> +	WRITE_ONCE(chain->fence, NULL);
>> +
>> +	/*
>> +	 * Don't garbage collect here to avoid recursion and potential stack
>> +	 * overflow.
>> +	 */
>> +	chain = dma_fence_chain_dequeue();
>> +	if (!chain)
>> +		return;
>> +
>> +	if (kref_read(&chain->base.refcount) ||
>> +	    READ_ONCE(chain->fence))
>> +		dma_fence_chain_enqueue(chain);
>> +	else
>> +		dma_fence_free(&chain->base);
>>   }
>>   
>>   const struct dma_fence_ops dma_fence_chain_ops = {
>> @@ -218,6 +296,40 @@ const struct dma_fence_ops dma_fence_chain_ops = {
>>   };
>>   EXPORT_SYMBOL(dma_fence_chain_ops);
>>   
>> +/**
>> + * dma_fence_chain_garbage_collect - cleanup chain nodes
>> + *
>> + * Do some garbage collection and try to release chain nodes.
>> + */
>> +void dma_fence_chain_garbage_collect(void)
>> +{
>> +	struct dma_fence_chain *chain = dma_fence_chain_dequeue();
>> +
>> +	if (!chain)
>> +		return;
>> +
>> +	if (!dma_fence_get_rcu(&chain->base)) {
>> +		/* Unused, check if it's also clean */
>> +		if (likely(!READ_ONCE(chain->fence))) {
>> +			dma_fence_free(&chain->base);
>> +			return;
>> +		}
>> +
>> +	} else {
>> +		struct dma_fence *prev;
>> +
>> +		/* Used, do some chain walk */
>> +		prev = dma_fence_chain_get_prev(chain);
>> +		if (prev) {
>> +			dma_fence_chain_try_replace(chain, prev);
>> +			dma_fence_put(prev);
>> +		}
>> +		dma_fence_put(&chain->base);
>> +	}
>> +	dma_fence_chain_enqueue(chain);
>> +}
>> +EXPORT_SYMBOL(dma_fence_chain_garbage_collect);
>> +
>>   /**
>>    * dma_fence_chain_init - initialize a fence chain
>>    * @chain: the chain node to initialize
>> @@ -254,5 +366,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
>>   
>>   	dma_fence_init(&chain->base, &dma_fence_chain_ops,
>>   		       &chain->lock, context, seqno);
>> +	dma_fence_chain_enqueue(chain);
>>   }
>> +
>>   EXPORT_SYMBOL(dma_fence_chain_init);
>> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
>> index 5f45689a6d2e..b412b5396434 100644
>> --- a/include/linux/dma-fence-chain.h
>> +++ b/include/linux/dma-fence-chain.h
>> @@ -19,6 +19,7 @@
>>    * @base: fence base class
>>    * @lock: spinlock for fence handling
>>    * @prev: previous fence of the chain
>> + * @next: next chain node for garbage collection
>>    * @prev_seqno: original previous seqno before garbage collection
>>    * @fence: encapsulated fence
>>    * @cb: callback structure for signaling
>> @@ -27,6 +28,7 @@
>>   struct dma_fence_chain {
>>   	struct dma_fence base;
>>   	struct dma_fence __rcu *prev;
>> +	struct dma_fence_chain __rcu *next;
>>   	u64 prev_seqno;
>>   	struct dma_fence *fence;
>>   	union {
>> @@ -38,6 +40,8 @@ struct dma_fence_chain {
>>   
>>   extern const struct dma_fence_ops dma_fence_chain_ops;
>>   
>> +void dma_fence_chain_garbage_collect(void);
>> +
>>   /**
>>    * to_dma_fence_chain - cast a fence to a dma_fence_chain
>>    * @fence: fence to cast to a dma_fence_array
>> @@ -61,6 +65,7 @@ to_dma_fence_chain(struct dma_fence *fence)
>>    */
>>   static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
>>   {
>> +	dma_fence_chain_garbage_collect();
>>   	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>   };
>>   
>> -- 
>> 2.25.1
>>
Daniel Vetter June 11, 2021, 3:11 p.m. UTC | #3
On Fri, Jun 11, 2021 at 12:07:00PM +0200, Christian König wrote:
> Am 11.06.21 um 10:58 schrieb Daniel Vetter:
> > On Thu, Jun 10, 2021 at 11:17:57AM +0200, Christian König wrote:
> > > Add some rather sophisticated lockless garbage collection
> > > for dma_fence_chain objects.
> > > 
> > > For this keep all initialized dma_fence_chain nodes an a
> > > queue and trigger garbage collection before a new one is
> > > allocated.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Uh hand-rolled lockless list, I'm not a fan.
> > 
> > But the real question here is why? This is a global list, so it's going to
> > look great on your desktop, but gpus are for servers now and those are
> > NUMA. So just from that pov doing garbage-collection individually still
> > feels like a much better idea.
> 
> Yeah, I was pondering on that quite a bit as well. That why I added the
> multi threaded alloc/free test.
> 
> > So what's the problem your trying to solve here?
> 
> I was not sure if the chain is garbage collected enough when used for the
> dma_resv exclusive object.
> 
> But I think we could just drop this for now and just see how it goes.

Yeah I think fixing a perf/tuning problem when we're hitting it is much
better than trying to guess what it might be and writing something really
clever to anticipate that. We generally get it wrong.

Also with the explaination of what it tests added the testcase makes
sense. Just some documentation or comment would have helped.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/dma-buf/dma-fence-chain.c | 160 +++++++++++++++++++++++++-----
> > >   include/linux/dma-fence-chain.h   |   5 +
> > >   2 files changed, 142 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> > > index 1b4cb3e5cec9..c2f0b69eabb7 100644
> > > --- a/drivers/dma-buf/dma-fence-chain.c
> > > +++ b/drivers/dma-buf/dma-fence-chain.c
> > > @@ -9,8 +9,53 @@
> > >   #include <linux/dma-fence-chain.h>
> > > +static struct dma_fence_chain __rcu *fifo_front;
> > > +static struct dma_fence_chain __rcu **fifo_back = &fifo_front;
> > > +
> > >   static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
> > > +/**
> > > + * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
> > > + * @chain: the chain node to enqueue
> > > + *
> > > + * Add the chain node to the end of the gc fifo.
> > > + */
> > > +static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
> > > +{
> > > +	struct dma_fence_chain __rcu **tmp;
> > > +
> > > +	RCU_INIT_POINTER(chain->next, NULL);
> > > +	tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
> > > +		   &chain->next);
> > > +
> > > +	/* This is intentionally unordered since we only need the fifo for gc */
> > > +	rcu_assign_pointer(*tmp, chain);
> > > +}
> > > +
> > > +/**
> > > + * dma_fence_chain_dequeue - deqeueue a chain node for garbage collection
> > > + *
> > > + * Remove the first chain node from the gc fifo while making sure that always
> > > + * keep at least one node on the fifo for lockless fifo implementation.
> > > + * Returns the dequeued chain node or NULL.
> > > + */
> > > +static struct dma_fence_chain *dma_fence_chain_dequeue(void)
> > > +{
> > > +	struct dma_fence_chain *chain, *tmp;
> > > +
> > > +	rcu_read_lock();
> > > +	chain = rcu_dereference(fifo_front);
> > > +	/* Never dequeue the last chain node for lockless fifo */
> > > +	if (unlikely(!chain || !rcu_access_pointer(chain->next))) {
> > > +		rcu_read_unlock();
> > > +		return NULL;
> > > +	}
> > > +	tmp = cmpxchg((struct dma_fence_chain __force **)&fifo_front,
> > > +		      chain, rcu_access_pointer(chain->next));
> > > +	rcu_read_unlock();
> > > +	return tmp == chain ? chain : NULL;
> > > +}
> > > +
> > >   /**
> > >    * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
> > >    * @chain: chain node to get the previous node from
> > > @@ -28,6 +73,43 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
> > >   	return prev;
> > >   }
> > > +/**
> > > + * dma_fence_chain_try_replace - try to replace the prev node
> > > + * @chain: Chain node we try to replace prev.
> > > + * @prev: the old prev node
> > > + *
> > > + * Try to replace the previous chain node when it or its containing fence is
> > > + * signaled. Returns true if we tried, false if we need to wait.
> > > + */
> > > +static bool dma_fence_chain_try_replace(struct dma_fence_chain *chain,
> > > +					struct dma_fence *prev)
> > > +{
> > > +	struct dma_fence *replacement, *tmp;
> > > +	struct dma_fence_chain *prev_chain;
> > > +
> > > +	prev_chain = to_dma_fence_chain(prev);
> > > +	if (prev_chain) {
> > > +		if (!dma_fence_is_signaled(prev_chain->fence))
> > > +			return false;
> > > +
> > > +		replacement = dma_fence_chain_get_prev(prev_chain);
> > > +	} else {
> > > +		if (!dma_fence_is_signaled(prev))
> > > +			return false;
> > > +
> > > +		replacement = NULL;
> > > +	}
> > > +
> > > +	tmp = cmpxchg((struct dma_fence __force **)&chain->prev, prev,
> > > +		      replacement);
> > > +	if (tmp == prev)
> > > +		dma_fence_put(tmp);
> > > +	else
> > > +		dma_fence_put(replacement);
> > > +
> > > +	return true;
> > > +}
> > > +
> > >   /**
> > >    * dma_fence_chain_walk - chain walking function
> > >    * @fence: current chain node
> > > @@ -38,8 +120,8 @@ static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
> > >    */
> > >   struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
> > >   {
> > > -	struct dma_fence_chain *chain, *prev_chain;
> > > -	struct dma_fence *prev, *replacement, *tmp;
> > > +	struct dma_fence_chain *chain;
> > > +	struct dma_fence *prev;
> > >   	chain = to_dma_fence_chain(fence);
> > >   	if (!chain) {
> > > @@ -48,26 +130,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
> > >   	}
> > >   	while ((prev = dma_fence_chain_get_prev(chain))) {
> > > -
> > > -		prev_chain = to_dma_fence_chain(prev);
> > > -		if (prev_chain) {
> > > -			if (!dma_fence_is_signaled(prev_chain->fence))
> > > -				break;
> > > -
> > > -			replacement = dma_fence_chain_get_prev(prev_chain);
> > > -		} else {
> > > -			if (!dma_fence_is_signaled(prev))
> > > -				break;
> > > -
> > > -			replacement = NULL;
> > > -		}
> > > -
> > > -		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
> > > -			      prev, replacement);
> > > -		if (tmp == prev)
> > > -			dma_fence_put(tmp);
> > > -		else
> > > -			dma_fence_put(replacement);
> > > +		if (!dma_fence_chain_try_replace(chain, prev))
> > > +			break;
> > >   		dma_fence_put(prev);
> > >   	}
> > > @@ -205,7 +269,21 @@ static void dma_fence_chain_release(struct dma_fence *fence)
> > >   	dma_fence_put(prev);
> > >   	dma_fence_put(chain->fence);
> > > -	dma_fence_free(fence);
> > > +	WRITE_ONCE(chain->fence, NULL);
> > > +
> > > +	/*
> > > +	 * Don't garbage collect here to avoid recursion and potential stack
> > > +	 * overflow.
> > > +	 */
> > > +	chain = dma_fence_chain_dequeue();
> > > +	if (!chain)
> > > +		return;
> > > +
> > > +	if (kref_read(&chain->base.refcount) ||
> > > +	    READ_ONCE(chain->fence))
> > > +		dma_fence_chain_enqueue(chain);
> > > +	else
> > > +		dma_fence_free(&chain->base);
> > >   }
> > >   const struct dma_fence_ops dma_fence_chain_ops = {
> > > @@ -218,6 +296,40 @@ const struct dma_fence_ops dma_fence_chain_ops = {
> > >   };
> > >   EXPORT_SYMBOL(dma_fence_chain_ops);
> > > +/**
> > > + * dma_fence_chain_garbage_collect - cleanup chain nodes
> > > + *
> > > + * Do some garbage collection and try to release chain nodes.
> > > + */
> > > +void dma_fence_chain_garbage_collect(void)
> > > +{
> > > +	struct dma_fence_chain *chain = dma_fence_chain_dequeue();
> > > +
> > > +	if (!chain)
> > > +		return;
> > > +
> > > +	if (!dma_fence_get_rcu(&chain->base)) {
> > > +		/* Unused, check if it's also clean */
> > > +		if (likely(!READ_ONCE(chain->fence))) {
> > > +			dma_fence_free(&chain->base);
> > > +			return;
> > > +		}
> > > +
> > > +	} else {
> > > +		struct dma_fence *prev;
> > > +
> > > +		/* Used, do some chain walk */
> > > +		prev = dma_fence_chain_get_prev(chain);
> > > +		if (prev) {
> > > +			dma_fence_chain_try_replace(chain, prev);
> > > +			dma_fence_put(prev);
> > > +		}
> > > +		dma_fence_put(&chain->base);
> > > +	}
> > > +	dma_fence_chain_enqueue(chain);
> > > +}
> > > +EXPORT_SYMBOL(dma_fence_chain_garbage_collect);
> > > +
> > >   /**
> > >    * dma_fence_chain_init - initialize a fence chain
> > >    * @chain: the chain node to initialize
> > > @@ -254,5 +366,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
> > >   	dma_fence_init(&chain->base, &dma_fence_chain_ops,
> > >   		       &chain->lock, context, seqno);
> > > +	dma_fence_chain_enqueue(chain);
> > >   }
> > > +
> > >   EXPORT_SYMBOL(dma_fence_chain_init);
> > > diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
> > > index 5f45689a6d2e..b412b5396434 100644
> > > --- a/include/linux/dma-fence-chain.h
> > > +++ b/include/linux/dma-fence-chain.h
> > > @@ -19,6 +19,7 @@
> > >    * @base: fence base class
> > >    * @lock: spinlock for fence handling
> > >    * @prev: previous fence of the chain
> > > + * @next: next chain node for garbage collection
> > >    * @prev_seqno: original previous seqno before garbage collection
> > >    * @fence: encapsulated fence
> > >    * @cb: callback structure for signaling
> > > @@ -27,6 +28,7 @@
> > >   struct dma_fence_chain {
> > >   	struct dma_fence base;
> > >   	struct dma_fence __rcu *prev;
> > > +	struct dma_fence_chain __rcu *next;
> > >   	u64 prev_seqno;
> > >   	struct dma_fence *fence;
> > >   	union {
> > > @@ -38,6 +40,8 @@ struct dma_fence_chain {
> > >   extern const struct dma_fence_ops dma_fence_chain_ops;
> > > +void dma_fence_chain_garbage_collect(void);
> > > +
> > >   /**
> > >    * to_dma_fence_chain - cast a fence to a dma_fence_chain
> > >    * @fence: fence to cast to a dma_fence_array
> > > @@ -61,6 +65,7 @@ to_dma_fence_chain(struct dma_fence *fence)
> > >    */
> > >   static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
> > >   {
> > > +	dma_fence_chain_garbage_collect();
> > >   	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> > >   };
> > > -- 
> > > 2.25.1
> > > 
>
kernel test robot June 16, 2021, 5:29 p.m. UTC | #4
Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master v5.13-rc6 next-20210616]
[cannot apply to drm-tip/drm-tip drm/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/dma-buf-some-dma_fence_chain-improvements/20210616-182806
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: alpha-randconfig-s031-20210615 (attached as .config)
compiler: alpha-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/058b99a2929f84c7f2cb1516dcc8674bf3f2a661
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/dma-buf-some-dma_fence_chain-improvements/20210616-182806
        git checkout 058b99a2929f84c7f2cb1516dcc8674bf3f2a661
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=alpha 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/dma-buf/dma-fence-chain.c:28:15: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct dma_fence_chain **_x_ @@     got struct dma_fence_chain [noderef] __rcu ** @@
   drivers/dma-buf/dma-fence-chain.c:28:15: sparse:     expected struct dma_fence_chain **_x_
   drivers/dma-buf/dma-fence-chain.c:28:15: sparse:     got struct dma_fence_chain [noderef] __rcu **
>> drivers/dma-buf/dma-fence-chain.c:28:13: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct dma_fence_chain [noderef] __rcu **tmp @@     got struct dma_fence_chain **[assigned] __ret @@
   drivers/dma-buf/dma-fence-chain.c:28:13: sparse:     expected struct dma_fence_chain [noderef] __rcu **tmp
   drivers/dma-buf/dma-fence-chain.c:28:13: sparse:     got struct dma_fence_chain **[assigned] __ret

vim +28 drivers/dma-buf/dma-fence-chain.c

    16	
    17	/**
    18	 * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
    19	 * @chain: the chain node to enqueue
    20	 *
    21	 * Add the chain node to the end of the gc fifo.
    22	 */
    23	static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
    24	{
    25		struct dma_fence_chain __rcu **tmp;
    26	
    27		RCU_INIT_POINTER(chain->next, NULL);
  > 28		tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
    29			   &chain->next);
    30	
    31		/* This is intentionally unordered since we only need the fifo for gc */
    32		rcu_assign_pointer(*tmp, chain);
    33	}
    34	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot June 16, 2021, 6:50 p.m. UTC | #5
Hi "Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master v5.13-rc6 next-20210616]
[cannot apply to drm-tip/drm-tip drm/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/dma-buf-some-dma_fence_chain-improvements/20210616-182806
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s001-20210615 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/058b99a2929f84c7f2cb1516dcc8674bf3f2a661
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Christian-K-nig/dma-buf-some-dma_fence_chain-improvements/20210616-182806
        git checkout 058b99a2929f84c7f2cb1516dcc8674bf3f2a661
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/dma-buf/dma-fence-chain.c:28:15: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct dma_fence_chain **__ret @@     got struct dma_fence_chain [noderef] __rcu ** @@
   drivers/dma-buf/dma-fence-chain.c:28:15: sparse:     expected struct dma_fence_chain **__ret
   drivers/dma-buf/dma-fence-chain.c:28:15: sparse:     got struct dma_fence_chain [noderef] __rcu **
   drivers/dma-buf/dma-fence-chain.c:28:13: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct dma_fence_chain [noderef] __rcu **tmp @@     got struct dma_fence_chain **[assigned] __ret @@
   drivers/dma-buf/dma-fence-chain.c:28:13: sparse:     expected struct dma_fence_chain [noderef] __rcu **tmp
   drivers/dma-buf/dma-fence-chain.c:28:13: sparse:     got struct dma_fence_chain **[assigned] __ret

vim +28 drivers/dma-buf/dma-fence-chain.c

    16	
    17	/**
    18	 * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
    19	 * @chain: the chain node to enqueue
    20	 *
    21	 * Add the chain node to the end of the gc fifo.
    22	 */
    23	static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
    24	{
    25		struct dma_fence_chain __rcu **tmp;
    26	
    27		RCU_INIT_POINTER(chain->next, NULL);
  > 28		tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
    29			   &chain->next);
    30	
    31		/* This is intentionally unordered since we only need the fifo for gc */
    32		rcu_assign_pointer(*tmp, chain);
    33	}
    34	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 1b4cb3e5cec9..c2f0b69eabb7 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -9,8 +9,53 @@ 
 
 #include <linux/dma-fence-chain.h>
 
+static struct dma_fence_chain __rcu *fifo_front;
+static struct dma_fence_chain __rcu **fifo_back = &fifo_front;
+
 static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
 
+/**
+ * dma_fence_chain_enqueue - enqeue a chain node for garbage collection
+ * @chain: the chain node to enqueue
+ *
+ * Add the chain node to the end of the gc fifo.
+ */
+static void dma_fence_chain_enqueue(struct dma_fence_chain *chain)
+{
+	struct dma_fence_chain __rcu **tmp;
+
+	RCU_INIT_POINTER(chain->next, NULL);
+	tmp = xchg((struct dma_fence_chain __force ***)&fifo_back,
+		   &chain->next);
+
+	/* This is intentionally unordered since we only need the fifo for gc */
+	rcu_assign_pointer(*tmp, chain);
+}
+
+/**
+ * dma_fence_chain_dequeue - deqeueue a chain node for garbage collection
+ *
+ * Remove the first chain node from the gc fifo while making sure that always
+ * keep at least one node on the fifo for lockless fifo implementation.
+ * Returns the dequeued chain node or NULL.
+ */
+static struct dma_fence_chain *dma_fence_chain_dequeue(void)
+{
+	struct dma_fence_chain *chain, *tmp;
+
+	rcu_read_lock();
+	chain = rcu_dereference(fifo_front);
+	/* Never dequeue the last chain node for lockless fifo */
+	if (unlikely(!chain || !rcu_access_pointer(chain->next))) {
+		rcu_read_unlock();
+		return NULL;
+	}
+	tmp = cmpxchg((struct dma_fence_chain __force **)&fifo_front,
+		      chain, rcu_access_pointer(chain->next));
+	rcu_read_unlock();
+	return tmp == chain ? chain : NULL;
+}
+
 /**
  * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
  * @chain: chain node to get the previous node from
@@ -28,6 +73,43 @@  static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
 	return prev;
 }
 
+/**
+ * dma_fence_chain_try_replace - try to replace the prev node
+ * @chain: Chain node we try to replace prev.
+ * @prev: the old prev node
+ *
+ * Try to replace the previous chain node when it or its containing fence is
+ * signaled. Returns true if we tried, false if we need to wait.
+ */
+static bool dma_fence_chain_try_replace(struct dma_fence_chain *chain,
+					struct dma_fence *prev)
+{
+	struct dma_fence *replacement, *tmp;
+	struct dma_fence_chain *prev_chain;
+
+	prev_chain = to_dma_fence_chain(prev);
+	if (prev_chain) {
+		if (!dma_fence_is_signaled(prev_chain->fence))
+			return false;
+
+		replacement = dma_fence_chain_get_prev(prev_chain);
+	} else {
+		if (!dma_fence_is_signaled(prev))
+			return false;
+
+		replacement = NULL;
+	}
+
+	tmp = cmpxchg((struct dma_fence __force **)&chain->prev, prev,
+		      replacement);
+	if (tmp == prev)
+		dma_fence_put(tmp);
+	else
+		dma_fence_put(replacement);
+
+	return true;
+}
+
 /**
  * dma_fence_chain_walk - chain walking function
  * @fence: current chain node
@@ -38,8 +120,8 @@  static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
  */
 struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
 {
-	struct dma_fence_chain *chain, *prev_chain;
-	struct dma_fence *prev, *replacement, *tmp;
+	struct dma_fence_chain *chain;
+	struct dma_fence *prev;
 
 	chain = to_dma_fence_chain(fence);
 	if (!chain) {
@@ -48,26 +130,8 @@  struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
 	}
 
 	while ((prev = dma_fence_chain_get_prev(chain))) {
-
-		prev_chain = to_dma_fence_chain(prev);
-		if (prev_chain) {
-			if (!dma_fence_is_signaled(prev_chain->fence))
-				break;
-
-			replacement = dma_fence_chain_get_prev(prev_chain);
-		} else {
-			if (!dma_fence_is_signaled(prev))
-				break;
-
-			replacement = NULL;
-		}
-
-		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
-			      prev, replacement);
-		if (tmp == prev)
-			dma_fence_put(tmp);
-		else
-			dma_fence_put(replacement);
+		if (!dma_fence_chain_try_replace(chain, prev))
+			break;
 		dma_fence_put(prev);
 	}
 
@@ -205,7 +269,21 @@  static void dma_fence_chain_release(struct dma_fence *fence)
 	dma_fence_put(prev);
 
 	dma_fence_put(chain->fence);
-	dma_fence_free(fence);
+	WRITE_ONCE(chain->fence, NULL);
+
+	/*
+	 * Don't garbage collect here to avoid recursion and potential stack
+	 * overflow.
+	 */
+	chain = dma_fence_chain_dequeue();
+	if (!chain)
+		return;
+
+	if (kref_read(&chain->base.refcount) ||
+	    READ_ONCE(chain->fence))
+		dma_fence_chain_enqueue(chain);
+	else
+		dma_fence_free(&chain->base);
 }
 
 const struct dma_fence_ops dma_fence_chain_ops = {
@@ -218,6 +296,40 @@  const struct dma_fence_ops dma_fence_chain_ops = {
 };
 EXPORT_SYMBOL(dma_fence_chain_ops);
 
+/**
+ * dma_fence_chain_garbage_collect - cleanup chain nodes
+ *
+ * Do some garbage collection and try to release chain nodes.
+ */
+void dma_fence_chain_garbage_collect(void)
+{
+	struct dma_fence_chain *chain = dma_fence_chain_dequeue();
+
+	if (!chain)
+		return;
+
+	if (!dma_fence_get_rcu(&chain->base)) {
+		/* Unused, check if it's also clean */
+		if (likely(!READ_ONCE(chain->fence))) {
+			dma_fence_free(&chain->base);
+			return;
+		}
+
+	} else {
+		struct dma_fence *prev;
+
+		/* Used, do some chain walk */
+		prev = dma_fence_chain_get_prev(chain);
+		if (prev) {
+			dma_fence_chain_try_replace(chain, prev);
+			dma_fence_put(prev);
+		}
+		dma_fence_put(&chain->base);
+	}
+	dma_fence_chain_enqueue(chain);
+}
+EXPORT_SYMBOL(dma_fence_chain_garbage_collect);
+
 /**
  * dma_fence_chain_init - initialize a fence chain
  * @chain: the chain node to initialize
@@ -254,5 +366,7 @@  void dma_fence_chain_init(struct dma_fence_chain *chain,
 
 	dma_fence_init(&chain->base, &dma_fence_chain_ops,
 		       &chain->lock, context, seqno);
+	dma_fence_chain_enqueue(chain);
 }
+
 EXPORT_SYMBOL(dma_fence_chain_init);
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 5f45689a6d2e..b412b5396434 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -19,6 +19,7 @@ 
  * @base: fence base class
  * @lock: spinlock for fence handling
  * @prev: previous fence of the chain
+ * @next: next chain node for garbage collection
  * @prev_seqno: original previous seqno before garbage collection
  * @fence: encapsulated fence
  * @cb: callback structure for signaling
@@ -27,6 +28,7 @@ 
 struct dma_fence_chain {
 	struct dma_fence base;
 	struct dma_fence __rcu *prev;
+	struct dma_fence_chain __rcu *next;
 	u64 prev_seqno;
 	struct dma_fence *fence;
 	union {
@@ -38,6 +40,8 @@  struct dma_fence_chain {
 
 extern const struct dma_fence_ops dma_fence_chain_ops;
 
+void dma_fence_chain_garbage_collect(void);
+
 /**
  * to_dma_fence_chain - cast a fence to a dma_fence_chain
  * @fence: fence to cast to a dma_fence_array
@@ -61,6 +65,7 @@  to_dma_fence_chain(struct dma_fence *fence)
  */
 static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
 {
+	dma_fence_chain_garbage_collect();
 	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
 };