diff mbox

[RFC,1/8] dma-buf/fence: add fence_collection fences

Message ID 1460683781-22535-2-git-send-email-gustavo@padovan.org (mailing list archive)
State New, archived
Headers show

Commit Message

Gustavo Padovan April 15, 2016, 1:29 a.m. UTC
From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>

struct fence_collection inherits from struct fence and carries a
collection of fences that needs to be waited together.

It is useful to translate a sync_file to a fence to remove the complexity
of dealing with sync_files from DRM drivers. So even if there are many
fences in the sync_file that needs to waited for a commit to happen
drivers would only worry about a standard struct fence.That means that no
changes needed to any driver besides supporting fences.

fence_collection's fence doesn't belong to any timeline context.

Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/dma-buf/Makefile           |   2 +-
 drivers/dma-buf/fence-collection.c | 138 +++++++++++++++++++++++++++++++++++++
 drivers/dma-buf/fence.c            |   2 +-
 include/linux/fence-collection.h   |  56 +++++++++++++++
 include/linux/fence.h              |   2 +
 5 files changed, 198 insertions(+), 2 deletions(-)
 create mode 100644 drivers/dma-buf/fence-collection.c
 create mode 100644 include/linux/fence-collection.h

Comments

Daniel Vetter April 15, 2016, 8:02 a.m. UTC | #1
On Thu, Apr 14, 2016 at 06:29:34PM -0700, Gustavo Padovan wrote:
> From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> 
> struct fence_collection inherits from struct fence and carries a
> collection of fences that needs to be waited together.
> 
> It is useful to translate a sync_file to a fence to remove the complexity
> of dealing with sync_files from DRM drivers. So even if there are many
> fences in the sync_file that needs to waited for a commit to happen
> drivers would only worry about a standard struct fence.That means that no
> changes needed to any driver besides supporting fences.
> 
> fence_collection's fence doesn't belong to any timeline context.
> 
> Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> ---
>  drivers/dma-buf/Makefile           |   2 +-
>  drivers/dma-buf/fence-collection.c | 138 +++++++++++++++++++++++++++++++++++++
>  drivers/dma-buf/fence.c            |   2 +-
>  include/linux/fence-collection.h   |  56 +++++++++++++++
>  include/linux/fence.h              |   2 +
>  5 files changed, 198 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/dma-buf/fence-collection.c
>  create mode 100644 include/linux/fence-collection.h
> 
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 43325a1..30b8464 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,3 +1,3 @@
> -obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
> +obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-collection.o
>  obj-$(CONFIG_SYNC_FILE)		+= sync_file.o sync_timeline.o sync_debug.o
>  obj-$(CONFIG_SW_SYNC)		+= sw_sync.o
> diff --git a/drivers/dma-buf/fence-collection.c b/drivers/dma-buf/fence-collection.c
> new file mode 100644
> index 0000000..8a4ecb0
> --- /dev/null
> +++ b/drivers/dma-buf/fence-collection.c
> @@ -0,0 +1,138 @@
> +/*
> + * fence-collection: aggregate fences to be waited together
> + *
> + * Copyright (C) 2016 Collabora Ltd
> + * Authors:
> + *	Gustavo Padovan <gustavo@padovan.org>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include <linux/export.h>
> +#include <linux/slab.h>
> +#include <linux/fence-collection.h>
> +
> +static const char *fence_collection_get_driver_name(struct fence *fence)
> +{
> +	struct fence_collection *collection = to_fence_collection(fence);
> +	struct fence *f = collection->fences[0].fence;
> +
> +	return f->ops->get_driver_name(fence);
> +}
> +
> +static const char *fence_collection_get_timeline_name(struct fence *fence)
> +{
> +	return "no context";
> +}
> +
> +static bool fence_collection_enable_signaling(struct fence *fence)
> +{
> +	struct fence_collection *collection = to_fence_collection(fence);
> +
> +	return atomic_read(&collection->num_pending_fences);
> +}
> +
> +static bool fence_collection_signaled(struct fence *fence)
> +{
> +	struct fence_collection *collection = to_fence_collection(fence);
> +
> +	return (atomic_read(&collection->num_pending_fences) == 0);
> +}
> +
> +static void fence_collection_release(struct fence *fence)
> +{
> +	struct fence_collection *collection = to_fence_collection(fence);
> +	int i;
> +
> +	for (i = 0 ; i < collection->num_fences ; i++) {
> +		fence_remove_callback(collection->fences[i].fence,
> +				      &collection->fences[i].cb);
> +		fence_put(collection->fences[i].fence);
> +	}
> +
> +	fence_free(fence);
> +}
> +
> +static signed long fence_collection_wait(struct fence *fence, bool intr,
> +					 signed long timeout)
> +{
> +	struct fence_collection *collection = to_fence_collection(fence);
> +	int i;
> +
> +	for (i = 0 ; i < collection->num_fences ; i++) {
> +		timeout = fence_wait(collection->fences[i].fence, intr);
> +		if (timeout < 0)
> +			return timeout;
> +	}
> +
> +	return timeout;
> +}
> +
> +static const struct fence_ops fence_collection_ops = {
> +	.get_driver_name = fence_collection_get_driver_name,
> +	.get_timeline_name = fence_collection_get_timeline_name,
> +	.enable_signaling = fence_collection_enable_signaling,
> +	.signaled = fence_collection_signaled,
> +	.wait = fence_collection_wait,
> +	.release = fence_collection_release,
> +};
> +
> +static void collection_check_cb_func(struct fence *fence, struct fence_cb *cb)
> +{
> +	struct fence_collection_cb *f_cb;
> +	struct fence_collection *collection;
> +
> +	f_cb = container_of(cb, struct fence_collection_cb, cb);
> +	collection = f_cb->collection;
> +
> +	if (atomic_dec_and_test(&collection->num_pending_fences))
> +		fence_signal(&collection->base);
> +}
> +
> +void fence_collection_add(struct fence_collection *collection,
> +			  struct fence *fence)
> +{
> +	int n = collection->num_fences;
> +
> +	collection->fences[n].collection = collection;
> +	collection->fences[n].fence = fence;
> +
> +	if (fence_add_callback(fence, &collection->fences[n].cb,
> +				 collection_check_cb_func))
> +		return;
> +
> +	fence_get(fence);
> +
> +	collection->num_fences++;
> +	atomic_inc(&collection->num_pending_fences);
> +}

For the interface I think we should not split it into _init and _add - it
shouldn't be allowed to change a collection once it's created. So probably
cleaner if we add an array of fence pointers to _init.

Other nitpick: Adding the callback should (I think) only be done in
->enable_signalling.

Finally: Have you looked into stitching together a few unit tests for
fence_collection?

Fence collections also break the assumption that every fence is on a
timeline. fence_later and fence_is_later need to be adjusted. We also need
a special collection context to filter these out. This means
fence_collection isn't perfectly opaque abstraction.
> +
> +struct fence_collection *fence_collection_init(int num_fences)
> +{
> +	struct fence_collection *collection;
> +
> +	collection = kzalloc(offsetof(struct fence_collection,
> +				      fences[num_fences]), GFP_KERNEL);
> +	if (!collection)
> +		return NULL;
> +
> +	spin_lock_init(&collection->lock);
> +	fence_init(&collection->base, &fence_collection_ops, &collection->lock,
> +		   FENCE_NO_CONTEXT, 0);
> +
> +	return collection;
> +}
> +EXPORT_SYMBOL(fence_collection_init);
> +
> +void fence_collection_put(struct fence_collection *collection)
> +{
> +	fence_put(&collection->base);

Not sure a specialized _put function is useful, I'd leave it out.

> +}
> +EXPORT_SYMBOL(fence_collection_put);
> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
> index 7b05dbe..486e95c 100644
> --- a/drivers/dma-buf/fence.c
> +++ b/drivers/dma-buf/fence.c
> @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
>   * context or not. One device can have multiple separate contexts,
>   * and they're used if some engine can run independently of another.
>   */
> -static atomic_t fence_context_counter = ATOMIC_INIT(0);
> +static atomic_t fence_context_counter = ATOMIC_INIT(1);
>  
>  /**
>   * fence_context_alloc - allocate an array of fence contexts
> diff --git a/include/linux/fence-collection.h b/include/linux/fence-collection.h
> new file mode 100644
> index 0000000..a798925
> --- /dev/null
> +++ b/include/linux/fence-collection.h
> @@ -0,0 +1,56 @@
> +/*
> + * fence-collection: aggregates fence to be waited together
> + *
> + * Copyright (C) 2016 Collabora Ltd
> + * Authors:
> + *	Gustavo Padovan <gustavo@padovan.org>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __LINUX_FENCE_COLLECTION_H
> +#define __LINUX_FENCE_COLLECTION_H
> +
> +#include <linux/fence.h>
> +
> +struct fence_collection_cb {
> +	struct fence_cb cb;
> +	struct fence *fence;
> +	struct fence_collection *collection;
> +};
> +
> +struct fence_collection {
> +	struct fence base;
> +
> +	spinlock_t lock;
> +	struct fence_cb fence_cb;
> +	atomic_t num_pending_fences;
> +	int num_fences;
> +	struct fence_collection_cb fences[];
> +};
> +
> +/**
> + * to_fence_collection - cast a fence to a fence_collection
> + * @fence: fence to cast to a fence_collection
> + *
> + * Returns NULL if the fence is not a fence_collection,
> + * or the fence_collection otherwise.
> + */
> +static inline struct fence_collection * to_fence_collection(struct fence *fence)
> +{

Kerneldoc claims it, but you don't check that the fence is indeed a
fence_collection. That's usually done by comparing the ops pointer.

> +	return container_of(fence, struct fence_collection, base);
> +}
> +
> +struct fence_collection *fence_collection_init(int num_fences);
> +void fence_collection_add(struct fence_collection *collection,
> +			  struct fence *fence);
> +void fence_collection_put(struct fence_collection *collection);
> +
> +#endif /* __LINUX_FENCE_COLLECTION_H */
> diff --git a/include/linux/fence.h b/include/linux/fence.h
> index 2b17698..02170dd 100644
> --- a/include/linux/fence.h
> +++ b/include/linux/fence.h
> @@ -30,6 +30,8 @@
>  #include <linux/printk.h>
>  #include <linux/rcupdate.h>
>  
> +#define FENCE_NO_CONTEXT 0
> +
>  struct fence;
>  struct fence_ops;
>  struct fence_cb;
> -- 
> 2.5.5
>
Christian König April 15, 2016, 9:03 a.m. UTC | #2
Am 15.04.2016 um 10:02 schrieb Daniel Vetter:
> On Thu, Apr 14, 2016 at 06:29:34PM -0700, Gustavo Padovan wrote:
>> From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
>>
>> struct fence_collection inherits from struct fence and carries a
>> collection of fences that needs to be waited together.
>>
>> It is useful to translate a sync_file to a fence to remove the complexity
>> of dealing with sync_files from DRM drivers. So even if there are many
>> fences in the sync_file that needs to waited for a commit to happen
>> drivers would only worry about a standard struct fence.That means that no
>> changes needed to any driver besides supporting fences.
>>
>> fence_collection's fence doesn't belong to any timeline context.
>>
>> Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
>> ---
>>   drivers/dma-buf/Makefile           |   2 +-
>>   drivers/dma-buf/fence-collection.c | 138 +++++++++++++++++++++++++++++++++++++
>>   drivers/dma-buf/fence.c            |   2 +-
>>   include/linux/fence-collection.h   |  56 +++++++++++++++
>>   include/linux/fence.h              |   2 +
>>   5 files changed, 198 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/dma-buf/fence-collection.c
>>   create mode 100644 include/linux/fence-collection.h
>>
>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>> index 43325a1..30b8464 100644
>> --- a/drivers/dma-buf/Makefile
>> +++ b/drivers/dma-buf/Makefile
>> @@ -1,3 +1,3 @@
>> -obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
>> +obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-collection.o
>>   obj-$(CONFIG_SYNC_FILE)		+= sync_file.o sync_timeline.o sync_debug.o
>>   obj-$(CONFIG_SW_SYNC)		+= sw_sync.o
>> diff --git a/drivers/dma-buf/fence-collection.c b/drivers/dma-buf/fence-collection.c
>> new file mode 100644
>> index 0000000..8a4ecb0
>> --- /dev/null
>> +++ b/drivers/dma-buf/fence-collection.c
>> @@ -0,0 +1,138 @@
>> +/*
>> + * fence-collection: aggregate fences to be waited together
>> + *
>> + * Copyright (C) 2016 Collabora Ltd
>> + * Authors:
>> + *	Gustavo Padovan <gustavo@padovan.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + */
>> +
>> +#include <linux/export.h>
>> +#include <linux/slab.h>
>> +#include <linux/fence-collection.h>
>> +
>> +static const char *fence_collection_get_driver_name(struct fence *fence)
>> +{
>> +	struct fence_collection *collection = to_fence_collection(fence);
>> +	struct fence *f = collection->fences[0].fence;
>> +
>> +	return f->ops->get_driver_name(fence);
>> +}

I would rather return some constant name here instead of relying that 
the collection already has a fence added and that all fences are from 
the same driver.

>> +
>> +static const char *fence_collection_get_timeline_name(struct fence *fence)
>> +{
>> +	return "no context";
>> +}
>> +
>> +static bool fence_collection_enable_signaling(struct fence *fence)
>> +{
>> +	struct fence_collection *collection = to_fence_collection(fence);
>> +
>> +	return atomic_read(&collection->num_pending_fences);
>> +}
>> +
>> +static bool fence_collection_signaled(struct fence *fence)
>> +{
>> +	struct fence_collection *collection = to_fence_collection(fence);
>> +
>> +	return (atomic_read(&collection->num_pending_fences) == 0);
>> +}
>> +
>> +static void fence_collection_release(struct fence *fence)
>> +{
>> +	struct fence_collection *collection = to_fence_collection(fence);
>> +	int i;
>> +
>> +	for (i = 0 ; i < collection->num_fences ; i++) {
>> +		fence_remove_callback(collection->fences[i].fence,
>> +				      &collection->fences[i].cb);
>> +		fence_put(collection->fences[i].fence);
>> +	}
>> +
>> +	fence_free(fence);
>> +}
>> +
>> +static signed long fence_collection_wait(struct fence *fence, bool intr,
>> +					 signed long timeout)
>> +{
>> +	struct fence_collection *collection = to_fence_collection(fence);
>> +	int i;
>> +
>> +	for (i = 0 ; i < collection->num_fences ; i++) {
>> +		timeout = fence_wait(collection->fences[i].fence, intr);
>> +		if (timeout < 0)
>> +			return timeout;
>> +	}
>> +
>> +	return timeout;
>> +}
>> +
>> +static const struct fence_ops fence_collection_ops = {
>> +	.get_driver_name = fence_collection_get_driver_name,
>> +	.get_timeline_name = fence_collection_get_timeline_name,
>> +	.enable_signaling = fence_collection_enable_signaling,
>> +	.signaled = fence_collection_signaled,
>> +	.wait = fence_collection_wait,
>> +	.release = fence_collection_release,
>> +};
>> +
>> +static void collection_check_cb_func(struct fence *fence, struct fence_cb *cb)
>> +{
>> +	struct fence_collection_cb *f_cb;
>> +	struct fence_collection *collection;
>> +
>> +	f_cb = container_of(cb, struct fence_collection_cb, cb);
>> +	collection = f_cb->collection;
>> +
>> +	if (atomic_dec_and_test(&collection->num_pending_fences))
>> +		fence_signal(&collection->base);
>> +}
>> +
>> +void fence_collection_add(struct fence_collection *collection,
>> +			  struct fence *fence)
>> +{
>> +	int n = collection->num_fences;
>> +
>> +	collection->fences[n].collection = collection;
>> +	collection->fences[n].fence = fence;
>> +
>> +	if (fence_add_callback(fence, &collection->fences[n].cb,
>> +				 collection_check_cb_func))
>> +		return;
>> +
>> +	fence_get(fence);
>> +
>> +	collection->num_fences++;
>> +	atomic_inc(&collection->num_pending_fences);
>> +}
> For the interface I think we should not split it into _init and _add - it
> shouldn't be allowed to change a collection once it's created. So probably
> cleaner if we add an array of fence pointers to _init.

Amdgpu also has an implementation for a fence collection which uses a a 
hashtable to keep the fences grouped by context (e.g. only the latest 
fence is keept for each context). See amdgpu_sync.c for reference.

We should either make the collection similar in a way that you can add 
as many fences as you want (like the amdgpu implementation) or make it 
static and only add a fixed number of fences right from the beginning.

I can certainly see use cases for both, but if you want to stick with a 
static approach you should probably call the new object fence_array 
instead of fence_collection and do as Daniel suggested.

> Other nitpick: Adding the callback should (I think) only be done in
> ->enable_signalling.

Yeah, I was about to complain on that as well.

Enabling signaling can have a huge overhead for some fence 
implementations. So it should only be used when needed

>
> Finally: Have you looked into stitching together a few unit tests for
> fence_collection?
>
> Fence collections also break the assumption that every fence is on a
> timeline. fence_later and fence_is_later need to be adjusted. We also need
> a special collection context to filter these out. This means
> fence_collection isn't perfectly opaque abstraction.
>> +
>> +struct fence_collection *fence_collection_init(int num_fences)
>> +{
>> +	struct fence_collection *collection;
>> +
>> +	collection = kzalloc(offsetof(struct fence_collection,
>> +				      fences[num_fences]), GFP_KERNEL);
>> +	if (!collection)
>> +		return NULL;
>> +
>> +	spin_lock_init(&collection->lock);
>> +	fence_init(&collection->base, &fence_collection_ops, &collection->lock,
>> +		   FENCE_NO_CONTEXT, 0);
>> +
>> +	return collection;
>> +}
>> +EXPORT_SYMBOL(fence_collection_init);
>> +
>> +void fence_collection_put(struct fence_collection *collection)
>> +{
>> +	fence_put(&collection->base);
> Not sure a specialized _put function is useful, I'd leave it out.
>
>> +}
>> +EXPORT_SYMBOL(fence_collection_put);
>> diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
>> index 7b05dbe..486e95c 100644
>> --- a/drivers/dma-buf/fence.c
>> +++ b/drivers/dma-buf/fence.c
>> @@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
>>    * context or not. One device can have multiple separate contexts,
>>    * and they're used if some engine can run independently of another.
>>    */
>> -static atomic_t fence_context_counter = ATOMIC_INIT(0);
>> +static atomic_t fence_context_counter = ATOMIC_INIT(1);
>>   
>>   /**
>>    * fence_context_alloc - allocate an array of fence contexts
>> diff --git a/include/linux/fence-collection.h b/include/linux/fence-collection.h
>> new file mode 100644
>> index 0000000..a798925
>> --- /dev/null
>> +++ b/include/linux/fence-collection.h
>> @@ -0,0 +1,56 @@
>> +/*
>> + * fence-collection: aggregates fence to be waited together
>> + *
>> + * Copyright (C) 2016 Collabora Ltd
>> + * Authors:
>> + *	Gustavo Padovan <gustavo@padovan.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + */
>> +
>> +#ifndef __LINUX_FENCE_COLLECTION_H
>> +#define __LINUX_FENCE_COLLECTION_H
>> +
>> +#include <linux/fence.h>
>> +
>> +struct fence_collection_cb {
>> +	struct fence_cb cb;
>> +	struct fence *fence;
>> +	struct fence_collection *collection;
>> +};
>> +
>> +struct fence_collection {
>> +	struct fence base;
>> +
>> +	spinlock_t lock;
>> +	struct fence_cb fence_cb;
>> +	atomic_t num_pending_fences;
>> +	int num_fences;
>> +	struct fence_collection_cb fences[];
>> +};
>> +
>> +/**
>> + * to_fence_collection - cast a fence to a fence_collection
>> + * @fence: fence to cast to a fence_collection
>> + *
>> + * Returns NULL if the fence is not a fence_collection,
>> + * or the fence_collection otherwise.
>> + */
>> +static inline struct fence_collection * to_fence_collection(struct fence *fence)
>> +{
> Kerneldoc claims it, but you don't check that the fence is indeed a
> fence_collection. That's usually done by comparing the ops pointer.
>
>> +	return container_of(fence, struct fence_collection, base);
>> +}
>> +
>> +struct fence_collection *fence_collection_init(int num_fences);
>> +void fence_collection_add(struct fence_collection *collection,
>> +			  struct fence *fence);
>> +void fence_collection_put(struct fence_collection *collection);
>> +
>> +#endif /* __LINUX_FENCE_COLLECTION_H */
>> diff --git a/include/linux/fence.h b/include/linux/fence.h
>> index 2b17698..02170dd 100644
>> --- a/include/linux/fence.h
>> +++ b/include/linux/fence.h
>> @@ -30,6 +30,8 @@
>>   #include <linux/printk.h>
>>   #include <linux/rcupdate.h>
>>   
>> +#define FENCE_NO_CONTEXT 0

Might be that how amdgpu uses the fence context and sequence number is a 
bit questionable, but this will completely break it.

So we need a fix for amdgpu as well before this can go upstream.

Regards,
Christian.

>> +
>>   struct fence;
>>   struct fence_ops;
>>   struct fence_cb;
>> -- 
>> 2.5.5
>>
Daniel Vetter April 15, 2016, 11:44 a.m. UTC | #3
On Fri, Apr 15, 2016 at 11:03 AM, Christian König
<christian.koenig@amd.com> wrote:
> Might be that how amdgpu uses the fence context and sequence number is a bit
> questionable, but this will completely break it.

You mean it tries to qualesce fences in the same context down to just
the last one? That's how it's supposed to be done, and
fence_collections do break this somewhat. Without fixing up
fence_is_later and friends. Sounds like amdgpu is a good use case to
make sure the changes in semantics in these functions result in
sensible code. In a way a fence_collection is a fence where the
timeline never matches with any other timeline (since it's a
combiation).

And yeah I think fence_collection should probably compress down the
fences to 1 per timeline. But then that's just an implementation
detail we can fix later on.
-Daniel
Gustavo Padovan April 15, 2016, 6:27 p.m. UTC | #4
2016-04-15 Christian König <christian.koenig@amd.com>:

> Am 15.04.2016 um 10:02 schrieb Daniel Vetter:
> >On Thu, Apr 14, 2016 at 06:29:34PM -0700, Gustavo Padovan wrote:
> >>From: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> >>
> >>struct fence_collection inherits from struct fence and carries a
> >>collection of fences that needs to be waited together.
> >>
> >>It is useful to translate a sync_file to a fence to remove the complexity
> >>of dealing with sync_files from DRM drivers. So even if there are many
> >>fences in the sync_file that needs to waited for a commit to happen
> >>drivers would only worry about a standard struct fence.That means that no
> >>changes needed to any driver besides supporting fences.
> >>
> >>fence_collection's fence doesn't belong to any timeline context.
> >>
> >>Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> >>---
> >>  drivers/dma-buf/Makefile           |   2 +-
> >>  drivers/dma-buf/fence-collection.c | 138 +++++++++++++++++++++++++++++++++++++
> >>  drivers/dma-buf/fence.c            |   2 +-
> >>  include/linux/fence-collection.h   |  56 +++++++++++++++
> >>  include/linux/fence.h              |   2 +
> >>  5 files changed, 198 insertions(+), 2 deletions(-)
> >>  create mode 100644 drivers/dma-buf/fence-collection.c
> >>  create mode 100644 include/linux/fence-collection.h
> >>
> >>diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> >>index 43325a1..30b8464 100644
> >>--- a/drivers/dma-buf/Makefile
> >>+++ b/drivers/dma-buf/Makefile
> >>@@ -1,3 +1,3 @@
> >>-obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
> >>+obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-collection.o
> >>  obj-$(CONFIG_SYNC_FILE)		+= sync_file.o sync_timeline.o sync_debug.o
> >>  obj-$(CONFIG_SW_SYNC)		+= sw_sync.o
> >>diff --git a/drivers/dma-buf/fence-collection.c b/drivers/dma-buf/fence-collection.c
> >>new file mode 100644
> >>index 0000000..8a4ecb0
> >>--- /dev/null
> >>+++ b/drivers/dma-buf/fence-collection.c
> >>@@ -0,0 +1,138 @@
> >>+/*
> >>+ * fence-collection: aggregate fences to be waited together
> >>+ *
> >>+ * Copyright (C) 2016 Collabora Ltd
> >>+ * Authors:
> >>+ *	Gustavo Padovan <gustavo@padovan.org>
> >>+ *
> >>+ * This program is free software; you can redistribute it and/or modify it
> >>+ * under the terms of the GNU General Public License version 2 as published by
> >>+ * the Free Software Foundation.
> >>+ *
> >>+ * This program is distributed in the hope that it will be useful, but WITHOUT
> >>+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> >>+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> >>+ * more details.
> >>+ */
> >>+
> >>+#include <linux/export.h>
> >>+#include <linux/slab.h>
> >>+#include <linux/fence-collection.h>
> >>+
> >>+static const char *fence_collection_get_driver_name(struct fence *fence)
> >>+{
> >>+	struct fence_collection *collection = to_fence_collection(fence);
> >>+	struct fence *f = collection->fences[0].fence;
> >>+
> >>+	return f->ops->get_driver_name(fence);
> >>+}
> 
> I would rather return some constant name here instead of relying that the
> collection already has a fence added and that all fences are from the same
> driver.

If we merge _init and _add this will not be a problem anymore and we can
return the actual driver name.

> 
> >>+
> >>+static const char *fence_collection_get_timeline_name(struct fence *fence)
> >>+{
> >>+	return "no context";
> >>+}
> >>+
> >>+static bool fence_collection_enable_signaling(struct fence *fence)
> >>+{
> >>+	struct fence_collection *collection = to_fence_collection(fence);
> >>+
> >>+	return atomic_read(&collection->num_pending_fences);
> >>+}
> >>+
> >>+static bool fence_collection_signaled(struct fence *fence)
> >>+{
> >>+	struct fence_collection *collection = to_fence_collection(fence);
> >>+
> >>+	return (atomic_read(&collection->num_pending_fences) == 0);
> >>+}
> >>+
> >>+static void fence_collection_release(struct fence *fence)
> >>+{
> >>+	struct fence_collection *collection = to_fence_collection(fence);
> >>+	int i;
> >>+
> >>+	for (i = 0 ; i < collection->num_fences ; i++) {
> >>+		fence_remove_callback(collection->fences[i].fence,
> >>+				      &collection->fences[i].cb);
> >>+		fence_put(collection->fences[i].fence);
> >>+	}
> >>+
> >>+	fence_free(fence);
> >>+}
> >>+
> >>+static signed long fence_collection_wait(struct fence *fence, bool intr,
> >>+					 signed long timeout)
> >>+{
> >>+	struct fence_collection *collection = to_fence_collection(fence);
> >>+	int i;
> >>+
> >>+	for (i = 0 ; i < collection->num_fences ; i++) {
> >>+		timeout = fence_wait(collection->fences[i].fence, intr);
> >>+		if (timeout < 0)
> >>+			return timeout;
> >>+	}
> >>+
> >>+	return timeout;
> >>+}
> >>+
> >>+static const struct fence_ops fence_collection_ops = {
> >>+	.get_driver_name = fence_collection_get_driver_name,
> >>+	.get_timeline_name = fence_collection_get_timeline_name,
> >>+	.enable_signaling = fence_collection_enable_signaling,
> >>+	.signaled = fence_collection_signaled,
> >>+	.wait = fence_collection_wait,
> >>+	.release = fence_collection_release,
> >>+};
> >>+
> >>+static void collection_check_cb_func(struct fence *fence, struct fence_cb *cb)
> >>+{
> >>+	struct fence_collection_cb *f_cb;
> >>+	struct fence_collection *collection;
> >>+
> >>+	f_cb = container_of(cb, struct fence_collection_cb, cb);
> >>+	collection = f_cb->collection;
> >>+
> >>+	if (atomic_dec_and_test(&collection->num_pending_fences))
> >>+		fence_signal(&collection->base);
> >>+}
> >>+
> >>+void fence_collection_add(struct fence_collection *collection,
> >>+			  struct fence *fence)
> >>+{
> >>+	int n = collection->num_fences;
> >>+
> >>+	collection->fences[n].collection = collection;
> >>+	collection->fences[n].fence = fence;
> >>+
> >>+	if (fence_add_callback(fence, &collection->fences[n].cb,
> >>+				 collection_check_cb_func))
> >>+		return;
> >>+
> >>+	fence_get(fence);
> >>+
> >>+	collection->num_fences++;
> >>+	atomic_inc(&collection->num_pending_fences);
> >>+}
> >For the interface I think we should not split it into _init and _add - it
> >shouldn't be allowed to change a collection once it's created. So probably
> >cleaner if we add an array of fence pointers to _init.
> 
> Amdgpu also has an implementation for a fence collection which uses a a
> hashtable to keep the fences grouped by context (e.g. only the latest fence
> is keept for each context). See amdgpu_sync.c for reference.
> 
> We should either make the collection similar in a way that you can add as
> many fences as you want (like the amdgpu implementation) or make it static
> and only add a fixed number of fences right from the beginning.
> 
> I can certainly see use cases for both, but if you want to stick with a
> static approach you should probably call the new object fence_array instead
> of fence_collection and do as Daniel suggested.

Maybe we can go for something in between. Have fence_collection_init()
need at least two fences to create the fence_collection. Then
fence_collection_add() would add more dinamically.

> 
> >Other nitpick: Adding the callback should (I think) only be done in
> >->enable_signalling.
> 
> Yeah, I was about to complain on that as well.
> 
> Enabling signaling can have a huge overhead for some fence implementations.
> So it should only be used when needed

Right, that makes sense.

> 
> >
> >Finally: Have you looked into stitching together a few unit tests for
> >fence_collection?

Not yet. It is something I plan to work soon.

> >
> >Fence collections also break the assumption that every fence is on a
> >timeline. fence_later and fence_is_later need to be adjusted. We also need
> >a special collection context to filter these out. This means
> >fence_collection isn't perfectly opaque abstraction.

I'll fix this.

> >>+
> >>+struct fence_collection *fence_collection_init(int num_fences)
> >>+{
> >>+	struct fence_collection *collection;
> >>+
> >>+	collection = kzalloc(offsetof(struct fence_collection,
> >>+				      fences[num_fences]), GFP_KERNEL);
> >>+	if (!collection)
> >>+		return NULL;
> >>+
> >>+	spin_lock_init(&collection->lock);
> >>+	fence_init(&collection->base, &fence_collection_ops, &collection->lock,
> >>+		   FENCE_NO_CONTEXT, 0);
> >>+
> >>+	return collection;
> >>+}
> >>+EXPORT_SYMBOL(fence_collection_init);
> >>+
> >>+void fence_collection_put(struct fence_collection *collection)
> >>+{
> >>+	fence_put(&collection->base);
> >Not sure a specialized _put function is useful, I'd leave it out.

I've added this to let the user only deal with fence_collection, but 
I'm fine leaving it out too.

	Gustavo
Gustavo Padovan April 15, 2016, 6:29 p.m. UTC | #5
2016-04-15 Daniel Vetter <daniel@ffwll.ch>:

> On Fri, Apr 15, 2016 at 11:03 AM, Christian König
> <christian.koenig@amd.com> wrote:
> > Might be that how amdgpu uses the fence context and sequence number is a bit
> > questionable, but this will completely break it.
> 
> You mean it tries to qualesce fences in the same context down to just
> the last one? That's how it's supposed to be done, and
> fence_collections do break this somewhat. Without fixing up
> fence_is_later and friends. Sounds like amdgpu is a good use case to
> make sure the changes in semantics in these functions result in
> sensible code. In a way a fence_collection is a fence where the
> timeline never matches with any other timeline (since it's a
> combiation).
> 
> And yeah I think fence_collection should probably compress down the
> fences to 1 per timeline. But then that's just an implementation
> detail we can fix later on.

You mean asking for a new context for every collection?

	Gustavo
Daniel Vetter April 15, 2016, 7:23 p.m. UTC | #6
On Fri, Apr 15, 2016 at 11:29:34AM -0700, Gustavo Padovan wrote:
> 2016-04-15 Daniel Vetter <daniel@ffwll.ch>:
> 
> > On Fri, Apr 15, 2016 at 11:03 AM, Christian König
> > <christian.koenig@amd.com> wrote:
> > > Might be that how amdgpu uses the fence context and sequence number is a bit
> > > questionable, but this will completely break it.
> > 
> > You mean it tries to qualesce fences in the same context down to just
> > the last one? That's how it's supposed to be done, and
> > fence_collections do break this somewhat. Without fixing up
> > fence_is_later and friends. Sounds like amdgpu is a good use case to
> > make sure the changes in semantics in these functions result in
> > sensible code. In a way a fence_collection is a fence where the
> > timeline never matches with any other timeline (since it's a
> > combiation).
> > 
> > And yeah I think fence_collection should probably compress down the
> > fences to 1 per timeline. But then that's just an implementation
> > detail we can fix later on.
> 
> You mean asking for a new context for every collection?

That would be one solution, but I fear it's a bit expensive. Having a
special-case context for collections might be the better approach.
-Daniel
Daniel Vetter April 15, 2016, 7:25 p.m. UTC | #7
On Fri, Apr 15, 2016 at 11:27:50AM -0700, Gustavo Padovan wrote:
> 2016-04-15 Christian König <christian.koenig@amd.com>:
> > Amdgpu also has an implementation for a fence collection which uses a a
> > hashtable to keep the fences grouped by context (e.g. only the latest fence
> > is keept for each context). See amdgpu_sync.c for reference.
> > 
> > We should either make the collection similar in a way that you can add as
> > many fences as you want (like the amdgpu implementation) or make it static
> > and only add a fixed number of fences right from the beginning.
> > 
> > I can certainly see use cases for both, but if you want to stick with a
> > static approach you should probably call the new object fence_array instead
> > of fence_collection and do as Daniel suggested.
> 
> Maybe we can go for something in between. Have fence_collection_init()
> need at least two fences to create the fence_collection. Then
> fence_collection_add() would add more dinamically.

The problem with adding fences later on is that it makes it trivial to add
deadlocks and loops. Just add the fence collection to itself, boom. From
that pov it's an unsafe api, and hence something to avoid.
-Daniel
Christian König May 18, 2016, 7:07 a.m. UTC | #8
Am 15.04.2016 um 21:25 schrieb Daniel Vetter:
> On Fri, Apr 15, 2016 at 11:27:50AM -0700, Gustavo Padovan wrote:
>> 2016-04-15 Christian König <christian.koenig@amd.com>:
>>> Amdgpu also has an implementation for a fence collection which uses a a
>>> hashtable to keep the fences grouped by context (e.g. only the latest fence
>>> is keept for each context). See amdgpu_sync.c for reference.
>>>
>>> We should either make the collection similar in a way that you can add as
>>> many fences as you want (like the amdgpu implementation) or make it static
>>> and only add a fixed number of fences right from the beginning.
>>>
>>> I can certainly see use cases for both, but if you want to stick with a
>>> static approach you should probably call the new object fence_array instead
>>> of fence_collection and do as Daniel suggested.
>> Maybe we can go for something in between. Have fence_collection_init()
>> need at least two fences to create the fence_collection. Then
>> fence_collection_add() would add more dinamically.
> The problem with adding fences later on is that it makes it trivial to add
> deadlocks and loops. Just add the fence collection to itself, boom. From
> that pov it's an unsafe api, and hence something to avoid.
> -Daniel

Any conclusion on this? Did any version of the patch made it upstream?

I'm in the need of an array based fence collection right now as well. 
Any objection that I just take the patch proposed here and fix the 
comments or are you still else working on this right now?

Regards,
Christian.
Gustavo Padovan May 18, 2016, 2:30 p.m. UTC | #9
Hi Christian,

2016-05-18 Christian König <christian.koenig@amd.com>:

> Am 15.04.2016 um 21:25 schrieb Daniel Vetter:
> > On Fri, Apr 15, 2016 at 11:27:50AM -0700, Gustavo Padovan wrote:
> > > 2016-04-15 Christian König <christian.koenig@amd.com>:
> > > > Amdgpu also has an implementation for a fence collection which uses a a
> > > > hashtable to keep the fences grouped by context (e.g. only the latest fence
> > > > is keept for each context). See amdgpu_sync.c for reference.
> > > > 
> > > > We should either make the collection similar in a way that you can add as
> > > > many fences as you want (like the amdgpu implementation) or make it static
> > > > and only add a fixed number of fences right from the beginning.
> > > > 
> > > > I can certainly see use cases for both, but if you want to stick with a
> > > > static approach you should probably call the new object fence_array instead
> > > > of fence_collection and do as Daniel suggested.
> > > Maybe we can go for something in between. Have fence_collection_init()
> > > need at least two fences to create the fence_collection. Then
> > > fence_collection_add() would add more dinamically.
> > The problem with adding fences later on is that it makes it trivial to add
> > deadlocks and loops. Just add the fence collection to itself, boom. From
> > that pov it's an unsafe api, and hence something to avoid.
> > -Daniel
> 
> Any conclusion on this? Did any version of the patch made it upstream?
> 
> I'm in the need of an array based fence collection right now as well. Any
> objection that I just take the patch proposed here and fix the comments or
> are you still else working on this right now?

I have a new version of this patch that I didn't send upstream yet
because it is part of a bigger patchset. But I can split it and send
what I have for fence_collection later today.

	Gustavo
diff mbox

Patch

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 43325a1..30b8464 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,3 +1,3 @@ 
-obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
+obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-collection.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o sync_timeline.o sync_debug.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o
diff --git a/drivers/dma-buf/fence-collection.c b/drivers/dma-buf/fence-collection.c
new file mode 100644
index 0000000..8a4ecb0
--- /dev/null
+++ b/drivers/dma-buf/fence-collection.c
@@ -0,0 +1,138 @@ 
+/*
+ * fence-collection: aggregate fences to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Authors:
+ *	Gustavo Padovan <gustavo@padovan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/fence-collection.h>
+
+static const char *fence_collection_get_driver_name(struct fence *fence)
+{
+	struct fence_collection *collection = to_fence_collection(fence);
+	struct fence *f = collection->fences[0].fence;
+
+	return f->ops->get_driver_name(fence);
+}
+
+static const char *fence_collection_get_timeline_name(struct fence *fence)
+{
+	return "no context";
+}
+
+static bool fence_collection_enable_signaling(struct fence *fence)
+{
+	struct fence_collection *collection = to_fence_collection(fence);
+
+	return atomic_read(&collection->num_pending_fences);
+}
+
+static bool fence_collection_signaled(struct fence *fence)
+{
+	struct fence_collection *collection = to_fence_collection(fence);
+
+	return (atomic_read(&collection->num_pending_fences) == 0);
+}
+
+static void fence_collection_release(struct fence *fence)
+{
+	struct fence_collection *collection = to_fence_collection(fence);
+	int i;
+
+	for (i = 0 ; i < collection->num_fences ; i++) {
+		fence_remove_callback(collection->fences[i].fence,
+				      &collection->fences[i].cb);
+		fence_put(collection->fences[i].fence);
+	}
+
+	fence_free(fence);
+}
+
+static signed long fence_collection_wait(struct fence *fence, bool intr,
+					 signed long timeout)
+{
+	struct fence_collection *collection = to_fence_collection(fence);
+	int i;
+
+	for (i = 0 ; i < collection->num_fences ; i++) {
+		timeout = fence_wait(collection->fences[i].fence, intr);
+		if (timeout < 0)
+			return timeout;
+	}
+
+	return timeout;
+}
+
+static const struct fence_ops fence_collection_ops = {
+	.get_driver_name = fence_collection_get_driver_name,
+	.get_timeline_name = fence_collection_get_timeline_name,
+	.enable_signaling = fence_collection_enable_signaling,
+	.signaled = fence_collection_signaled,
+	.wait = fence_collection_wait,
+	.release = fence_collection_release,
+};
+
+static void collection_check_cb_func(struct fence *fence, struct fence_cb *cb)
+{
+	struct fence_collection_cb *f_cb;
+	struct fence_collection *collection;
+
+	f_cb = container_of(cb, struct fence_collection_cb, cb);
+	collection = f_cb->collection;
+
+	if (atomic_dec_and_test(&collection->num_pending_fences))
+		fence_signal(&collection->base);
+}
+
+void fence_collection_add(struct fence_collection *collection,
+			  struct fence *fence)
+{
+	int n = collection->num_fences;
+
+	collection->fences[n].collection = collection;
+	collection->fences[n].fence = fence;
+
+	if (fence_add_callback(fence, &collection->fences[n].cb,
+				 collection_check_cb_func))
+		return;
+
+	fence_get(fence);
+
+	collection->num_fences++;
+	atomic_inc(&collection->num_pending_fences);
+}
+
+struct fence_collection *fence_collection_init(int num_fences)
+{
+	struct fence_collection *collection;
+
+	collection = kzalloc(offsetof(struct fence_collection,
+				      fences[num_fences]), GFP_KERNEL);
+	if (!collection)
+		return NULL;
+
+	spin_lock_init(&collection->lock);
+	fence_init(&collection->base, &fence_collection_ops, &collection->lock,
+		   FENCE_NO_CONTEXT, 0);
+
+	return collection;
+}
+EXPORT_SYMBOL(fence_collection_init);
+
+void fence_collection_put(struct fence_collection *collection)
+{
+	fence_put(&collection->base);
+}
+EXPORT_SYMBOL(fence_collection_put);
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7b05dbe..486e95c 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -35,7 +35,7 @@  EXPORT_TRACEPOINT_SYMBOL(fence_emit);
  * context or not. One device can have multiple separate contexts,
  * and they're used if some engine can run independently of another.
  */
-static atomic_t fence_context_counter = ATOMIC_INIT(0);
+static atomic_t fence_context_counter = ATOMIC_INIT(1);
 
 /**
  * fence_context_alloc - allocate an array of fence contexts
diff --git a/include/linux/fence-collection.h b/include/linux/fence-collection.h
new file mode 100644
index 0000000..a798925
--- /dev/null
+++ b/include/linux/fence-collection.h
@@ -0,0 +1,56 @@ 
+/*
+ * fence-collection: aggregates fence to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Authors:
+ *	Gustavo Padovan <gustavo@padovan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_FENCE_COLLECTION_H
+#define __LINUX_FENCE_COLLECTION_H
+
+#include <linux/fence.h>
+
+struct fence_collection_cb {
+	struct fence_cb cb;
+	struct fence *fence;
+	struct fence_collection *collection;
+};
+
+struct fence_collection {
+	struct fence base;
+
+	spinlock_t lock;
+	struct fence_cb fence_cb;
+	atomic_t num_pending_fences;
+	int num_fences;
+	struct fence_collection_cb fences[];
+};
+
+/**
+ * to_fence_collection - cast a fence to a fence_collection
+ * @fence: fence to cast to a fence_collection
+ *
+ * Returns NULL if the fence is not a fence_collection,
+ * or the fence_collection otherwise.
+ */
+static inline struct fence_collection * to_fence_collection(struct fence *fence)
+{
+	return container_of(fence, struct fence_collection, base);
+}
+
+struct fence_collection *fence_collection_init(int num_fences);
+void fence_collection_add(struct fence_collection *collection,
+			  struct fence *fence);
+void fence_collection_put(struct fence_collection *collection);
+
+#endif /* __LINUX_FENCE_COLLECTION_H */
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 2b17698..02170dd 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -30,6 +30,8 @@ 
 #include <linux/printk.h>
 #include <linux/rcupdate.h>
 
+#define FENCE_NO_CONTEXT 0
+
 struct fence;
 struct fence_ops;
 struct fence_cb;