diff mbox series

[RFC] drm/syncobj: add IOCTL to register an eventfd for a timeline

Message ID 20221009144001.161124-1-contact@emersion.fr (mailing list archive)
State New, archived
Headers show
Series [RFC] drm/syncobj: add IOCTL to register an eventfd for a timeline | expand

Commit Message

Simon Ser Oct. 9, 2022, 2:40 p.m. UTC
Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
which signals an eventfd when a timeline point completes.

This is useful for Wayland compositors to handle wait-before-submit.
Wayland clients can send a timeline point to the compositor
before the point has materialized yet, then compositors can wait
for the point to materialize via this new IOCTL.

The existing DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT IOCTL is not suitable
because it blocks. Compositors want to integrate the wait with
their poll(2)-based event loop.

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Christian König <christian.koenig@amd.com>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Daniel Stone <daniel@fooishbar.org>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: James Jones <jajones@nvidia.com>
---
 drivers/gpu/drm/drm_internal.h |   3 +
 drivers/gpu/drm/drm_ioctl.c    |   3 +
 drivers/gpu/drm/drm_syncobj.c  | 113 +++++++++++++++++++++++++++++++--
 include/drm/drm_syncobj.h      |   6 +-
 include/uapi/drm/drm.h         |  15 +++++
 5 files changed, 133 insertions(+), 7 deletions(-)

Comments

Christian König Oct. 9, 2022, 6 p.m. UTC | #1
Am 09.10.22 um 16:40 schrieb Simon Ser:
> Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
> which signals an eventfd when a timeline point completes.

I was entertaining the same though for quite a while, but I would even 
go a step further and actually always base the wait before signal 
functionality of the drm_syncobj and the eventfd functionality. That 
would save us quite a bit of complexity I think.

As a general note I think the name 
DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD is just to long, just make 
that DRM_IOCTL_SYNCOBJ_EVENTFD. Same for the function names as well.

Additional to that I think we should also always have a graceful 
handling for binary syncobjs. So please try to avoid making this special 
for the timeline case (the timeline case should of course still be 
supported).

Regards,
Christian.


>
> This is useful for Wayland compositors to handle wait-before-submit.
> Wayland clients can send a timeline point to the compositor
> before the point has materialized yet, then compositors can wait
> for the point to materialize via this new IOCTL.
>
> The existing DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT IOCTL is not suitable
> because it blocks. Compositors want to integrate the wait with
> their poll(2)-based event loop.
>
> Signed-off-by: Simon Ser <contact@emersion.fr>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> Cc: Daniel Stone <daniel@fooishbar.org>
> Cc: Pekka Paalanen <ppaalanen@gmail.com>
> Cc: James Jones <jajones@nvidia.com>
> ---
>   drivers/gpu/drm/drm_internal.h |   3 +
>   drivers/gpu/drm/drm_ioctl.c    |   3 +
>   drivers/gpu/drm/drm_syncobj.c  | 113 +++++++++++++++++++++++++++++++--
>   include/drm/drm_syncobj.h      |   6 +-
>   include/uapi/drm/drm.h         |  15 +++++
>   5 files changed, 133 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 1fbbc19f1ac0..4244e929b7f9 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -242,6 +242,9 @@ int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   			   struct drm_file *file_private);
>   int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   				    struct drm_file *file_private);
> +int drm_syncobj_timeline_register_eventfd_ioctl(struct drm_device *dev,
> +						void *data,
> +						struct drm_file *file_private);
>   int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index ca2a6e6101dc..dcd18dba28b7 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -702,6 +702,9 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>   		      DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD,
> +		      drm_syncobj_timeline_register_eventfd_ioctl,
> +		      DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
>   		      DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 0c2be8360525..401d09b56cf1 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -185,6 +185,7 @@
>   
>   #include <linux/anon_inodes.h>
>   #include <linux/dma-fence-unwrap.h>
> +#include <linux/eventfd.h>
>   #include <linux/file.h>
>   #include <linux/fs.h>
>   #include <linux/sched/signal.h>
> @@ -212,6 +213,17 @@ struct syncobj_wait_entry {
>   static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>   				      struct syncobj_wait_entry *wait);
>   
> +struct syncobj_eventfd_entry {
> +	struct list_head node;
> +	struct dma_fence_cb fence_cb;
> +	struct eventfd_ctx *ev_fd_ctx;
> +	u64 point;
> +};
> +
> +static void
> +syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
> +			   struct syncobj_eventfd_entry *entry);
> +
>   /**
>    * drm_syncobj_find - lookup and reference a sync object.
>    * @file_private: drm file private pointer
> @@ -274,6 +286,25 @@ static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
>   	spin_unlock(&syncobj->lock);
>   }
>   
> +static void
> +syncobj_eventfd_entry_free(struct syncobj_eventfd_entry *entry)
> +{
> +	eventfd_ctx_put(entry->ev_fd_ctx);
> +	/* This happens inside the syncobj lock */
> +	list_del(&entry->node);
> +	kfree(entry);
> +}
> +
> +static void
> +drm_syncobj_add_eventfd(struct drm_syncobj *syncobj,
> +			struct syncobj_eventfd_entry *entry)
> +{
> +	spin_lock(&syncobj->lock);
> +	list_add_tail(&entry->node, &syncobj->cb_list);
> +	syncobj_eventfd_entry_func(syncobj, entry);
> +	spin_unlock(&syncobj->lock);
> +}
> +
>   /**
>    * drm_syncobj_add_point - add new timeline point to the syncobj
>    * @syncobj: sync object to add timeline point do
> @@ -288,7 +319,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
>   			   struct dma_fence *fence,
>   			   uint64_t point)
>   {
> -	struct syncobj_wait_entry *cur, *tmp;
> +	struct syncobj_wait_entry *wait_cur, *wait_tmp;
> +	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
>   	struct dma_fence *prev;
>   
>   	dma_fence_get(fence);
> @@ -302,8 +334,10 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
>   	dma_fence_chain_init(chain, prev, fence, point);
>   	rcu_assign_pointer(syncobj->fence, &chain->base);
>   
> -	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
> -		syncobj_wait_syncobj_func(syncobj, cur);
> +	list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
> +		syncobj_wait_syncobj_func(syncobj, wait_cur);
> +	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
> +		syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
>   	spin_unlock(&syncobj->lock);
>   
>   	/* Walk the chain once to trigger garbage collection */
> @@ -323,7 +357,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   			       struct dma_fence *fence)
>   {
>   	struct dma_fence *old_fence;
> -	struct syncobj_wait_entry *cur, *tmp;
> +	struct syncobj_wait_entry *wait_cur, *wait_tmp;
> +	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
>   
>   	if (fence)
>   		dma_fence_get(fence);
> @@ -335,8 +370,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>   	rcu_assign_pointer(syncobj->fence, fence);
>   
>   	if (fence != old_fence) {
> -		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
> -			syncobj_wait_syncobj_func(syncobj, cur);
> +		list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
> +			syncobj_wait_syncobj_func(syncobj, wait_cur);
> +		list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
> +			syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
>   	}
>   
>   	spin_unlock(&syncobj->lock);
> @@ -472,7 +509,13 @@ void drm_syncobj_free(struct kref *kref)
>   	struct drm_syncobj *syncobj = container_of(kref,
>   						   struct drm_syncobj,
>   						   refcount);
> +	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
> +
>   	drm_syncobj_replace_fence(syncobj, NULL);
> +
> +	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
> +		syncobj_eventfd_entry_free(ev_fd_cur);
> +
>   	kfree(syncobj);
>   }
>   EXPORT_SYMBOL(drm_syncobj_free);
> @@ -501,6 +544,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
>   
>   	kref_init(&syncobj->refcount);
>   	INIT_LIST_HEAD(&syncobj->cb_list);
> +	INIT_LIST_HEAD(&syncobj->ev_fd_list);
>   	spin_lock_init(&syncobj->lock);
>   
>   	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
> @@ -1304,6 +1348,63 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +static void
> +syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
> +			   struct syncobj_eventfd_entry *entry)
> +{
> +	int ret;
> +	struct dma_fence *fence;
> +
> +	/* This happens inside the syncobj lock */
> +	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
> +	ret = dma_fence_chain_find_seqno(&fence, entry->point);
> +	dma_fence_put(fence);
> +
> +	if (ret != 0)
> +		return;
> +
> +	eventfd_signal(entry->ev_fd_ctx, 1);
> +	syncobj_eventfd_entry_free(entry);
> +}
> +
> +int
> +drm_syncobj_timeline_register_eventfd_ioctl(struct drm_device *dev,
> +					    void *data,
> +					    struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_register_eventfd *args = data;
> +	struct drm_syncobj *syncobj;
> +	struct eventfd_ctx *ev_fd_ctx;
> +	struct syncobj_eventfd_entry *entry;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> +		return -EOPNOTSUPP;
> +
> +	/* TODO: support other flags? */
> +	if (args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)
> +		return -EINVAL;
> +
> +	syncobj = drm_syncobj_find(file_private, args->handle);
> +	if (!syncobj)
> +		return -ENOENT;
> +
> +	ev_fd_ctx = eventfd_ctx_fdget(args->fd);
> +	if (IS_ERR(ev_fd_ctx))
> +		return PTR_ERR(ev_fd_ctx);
> +
> +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> +	if (!entry) {
> +		eventfd_ctx_put(ev_fd_ctx);
> +		return -ENOMEM;
> +	}
> +	entry->ev_fd_ctx = ev_fd_ctx;
> +	entry->point = args->point;
> +
> +	drm_syncobj_add_eventfd(syncobj, entry);
> +	drm_syncobj_put(syncobj);
> +
> +	return 0;
> +}
>   
>   int
>   drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index 6cf7243a1dc5..b40052132e52 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -54,7 +54,11 @@ struct drm_syncobj {
>   	 */
>   	struct list_head cb_list;
>   	/**
> -	 * @lock: Protects &cb_list and write-locks &fence.
> +	 * @ev_fd_list: List of registered eventfd.
> +	 */
> +	struct list_head ev_fd_list;
> +	/**
> +	 * @lock: Protects &cb_list and &ev_fd_list, and write-locks &fence.
>   	 */
>   	spinlock_t lock;
>   	/**
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 642808520d92..359e21414196 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -909,6 +909,20 @@ struct drm_syncobj_timeline_wait {
>   	__u32 pad;
>   };
>   
> +/**
> + * struct drm_syncobj_timeline_register_eventfd
> + *
> + * Register an eventfd to be signalled when a timeline point completes. The
> + * eventfd counter will be incremented by one.
> + */
> +struct drm_syncobj_timeline_register_eventfd {
> +	__u32 handle;
> +	__u32 flags;
> +	__u64 point;
> +	__s32 fd;
> +	__u32 pad;
> +};
> +
>   
>   struct drm_syncobj_array {
>   	__u64 handles;
> @@ -1095,6 +1109,7 @@ extern "C" {
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>   #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD	DRM_IOWR(0xCE, struct drm_syncobj_timeline_register_eventfd)
>   
>   /**
>    * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
Pekka Paalanen Oct. 10, 2022, 8:19 a.m. UTC | #2
On Sun, 09 Oct 2022 14:40:14 +0000
Simon Ser <contact@emersion.fr> wrote:

> Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
> which signals an eventfd when a timeline point completes.
> 
> This is useful for Wayland compositors to handle wait-before-submit.
> Wayland clients can send a timeline point to the compositor
> before the point has materialized yet, then compositors can wait
> for the point to materialize via this new IOCTL.
> 
> The existing DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT IOCTL is not suitable
> because it blocks. Compositors want to integrate the wait with
> their poll(2)-based event loop.
> 
> Signed-off-by: Simon Ser <contact@emersion.fr>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> Cc: Daniel Stone <daniel@fooishbar.org>
> Cc: Pekka Paalanen <ppaalanen@gmail.com>
> Cc: James Jones <jajones@nvidia.com>
> ---
>  drivers/gpu/drm/drm_internal.h |   3 +
>  drivers/gpu/drm/drm_ioctl.c    |   3 +
>  drivers/gpu/drm/drm_syncobj.c  | 113 +++++++++++++++++++++++++++++++--
>  include/drm/drm_syncobj.h      |   6 +-
>  include/uapi/drm/drm.h         |  15 +++++
>  5 files changed, 133 insertions(+), 7 deletions(-)

...

> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 642808520d92..359e21414196 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -909,6 +909,20 @@ struct drm_syncobj_timeline_wait {
>  	__u32 pad;
>  };
>  

Hi,

I'm completely clueless about this API.

> +/**
> + * struct drm_syncobj_timeline_register_eventfd
> + *
> + * Register an eventfd to be signalled when a timeline point completes. The
> + * eventfd counter will be incremented by one.

Sounds nice.

Since the action is to increment the counter by one, does it mean it
will be possible to wait for a bunch of completions and have the
eventfd poll return only when they have all signaled?

> + */
> +struct drm_syncobj_timeline_register_eventfd {
> +	__u32 handle;

Handle of what?

> +	__u32 flags;

What flags are allowed? Must be zero for now?

> +	__u64 point;

Is this some Vulkan thingy?

> +	__s32 fd;

I guess the userspace needs to create an eventfd first, and pass it as
the argument here? This is not creating a new eventfd itself?

> +	__u32 pad;

Must be zero?


Thanks,
pq

> +};
> +
>  
>  struct drm_syncobj_array {
>  	__u64 handles;
> @@ -1095,6 +1109,7 @@ extern "C" {
>  #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>  #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>  #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD	DRM_IOWR(0xCE, struct drm_syncobj_timeline_register_eventfd)
>  
>  /**
>   * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
Simon Ser Oct. 10, 2022, 9:13 a.m. UTC | #3
On Sunday, October 9th, 2022 at 20:00, Christian König <christian.koenig@amd.com> wrote:

> Am 09.10.22 um 16:40 schrieb Simon Ser:
> 
> > Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
> > which signals an eventfd when a timeline point completes.
> 
> I was entertaining the same though for quite a while, but I would even
> go a step further and actually always base the wait before signal
> functionality of the drm_syncobj and the eventfd functionality. That
> would save us quite a bit of complexity I think.

Hm what do you mean exactly? I'm not sure I'm following.

> As a general note I think the name
> DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD is just to long, just make
> that DRM_IOCTL_SYNCOBJ_EVENTFD. Same for the function names as well.

Agreed.

> Additional to that I think we should also always have a graceful
> handling for binary syncobjs. So please try to avoid making this special
> for the timeline case (the timeline case should of course still be
> supported).

This makes sense to me.
Simon Ser Oct. 10, 2022, 9:20 a.m. UTC | #4
On Monday, October 10th, 2022 at 10:19, Pekka Paalanen <ppaalanen@gmail.com> wrote:

> I'm completely clueless about this API.

No worries!

> > +/**
> > + * struct drm_syncobj_timeline_register_eventfd
> > + *
> > + * Register an eventfd to be signalled when a timeline point completes. The
> > + * eventfd counter will be incremented by one.
> 
> Sounds nice.
> 
> Since the action is to increment the counter by one, does it mean it
> will be possible to wait for a bunch of completions and have the
> eventfd poll return only when they have all signaled?

It is possible to perform the IOCTL multiple times with the same eventfd, but
eventfd semnatics would wake up user-space each time any timeline point
completes.

> > + */
> > +struct drm_syncobj_timeline_register_eventfd {
> > +	__u32 handle;
> 
> Handle of what?

drm_syncobj handle

> > +	__u32 flags;
> 
> What flags are allowed? Must be zero for now?

Same flags as the wait IOCTL.

Must be WAIT_AVAILABLE for now, but I'll implement the zero case as well (see
TODO).

> > +	__u64 point;
> 
> Is this some Vulkan thingy?

It's a drm_syncobj timeline thing. The timeline contains multiple sync points.

> > +	__s32 fd;
> 
> I guess the userspace needs to create an eventfd first, and pass it as
> the argument here? This is not creating a new eventfd itself?

Correct

> > +	__u32 pad;
> 
> Must be zero?

Indeed.

I'll spell out these requirements explicitly in the next version.
Christian König Oct. 11, 2022, 12:10 p.m. UTC | #5
Am 10.10.22 um 11:13 schrieb Simon Ser:
> On Sunday, October 9th, 2022 at 20:00, Christian König <christian.koenig@amd.com> wrote:
>
>> Am 09.10.22 um 16:40 schrieb Simon Ser:
>>
>>> Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
>>> which signals an eventfd when a timeline point completes.
>> I was entertaining the same though for quite a while, but I would even
>> go a step further and actually always base the wait before signal
>> functionality of the drm_syncobj and the eventfd functionality. That
>> would save us quite a bit of complexity I think.
> Hm what do you mean exactly? I'm not sure I'm following.

Essentially we have the syncobj_wait_entry structure which is added to 
the list whenever somebody starts to wait for a sequence which hasn't 
materialized yet.

Instead of extending this maybe just completely nuke the 
syncobj_wait_entry handling and replace it with your implementation here.

Absolutely not a must have, but might be easier to maintain in the long 
term.

Regards,
Christian.

>
>> As a general note I think the name
>> DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD is just to long, just make
>> that DRM_IOCTL_SYNCOBJ_EVENTFD. Same for the function names as well.
> Agreed.
>
>> Additional to that I think we should also always have a graceful
>> handling for binary syncobjs. So please try to avoid making this special
>> for the timeline case (the timeline case should of course still be
>> supported).
> This makes sense to me.
Simon Ser Oct. 12, 2022, 10:25 a.m. UTC | #6
On Tuesday, October 11th, 2022 at 14:10, Christian König <christian.koenig@amd.com> wrote:

> Am 10.10.22 um 11:13 schrieb Simon Ser:
> > On Sunday, October 9th, 2022 at 20:00, Christian König <christian.koenig@amd.com> wrote:
> >
> >> Am 09.10.22 um 16:40 schrieb Simon Ser:
> >>
> >>> Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
> >>> which signals an eventfd when a timeline point completes.
> >> I was entertaining the same though for quite a while, but I would even
> >> go a step further and actually always base the wait before signal
> >> functionality of the drm_syncobj and the eventfd functionality. That
> >> would save us quite a bit of complexity I think.
> > Hm what do you mean exactly? I'm not sure I'm following.
> 
> Essentially we have the syncobj_wait_entry structure which is added to
> the list whenever somebody starts to wait for a sequence which hasn't
> materialized yet.
> 
> Instead of extending this maybe just completely nuke the
> syncobj_wait_entry handling and replace it with your implementation here.

Hm, no sure I understand how that would work. We don't have an eventfd in the
WAIT IOCTL...

We could merge the two structs and figure out which callback to invoke based
on which fields are filled, but I figured two separate structs would be make
for a cleaner split.
Christian König Oct. 12, 2022, 11 a.m. UTC | #7
Am 12.10.22 um 12:25 schrieb Simon Ser:
> On Tuesday, October 11th, 2022 at 14:10, Christian König <christian.koenig@amd.com> wrote:
>
>> Am 10.10.22 um 11:13 schrieb Simon Ser:
>>> On Sunday, October 9th, 2022 at 20:00, Christian König <christian.koenig@amd.com> wrote:
>>>
>>>> Am 09.10.22 um 16:40 schrieb Simon Ser:
>>>>
>>>>> Introduce a new DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD IOCTL
>>>>> which signals an eventfd when a timeline point completes.
>>>> I was entertaining the same though for quite a while, but I would even
>>>> go a step further and actually always base the wait before signal
>>>> functionality of the drm_syncobj and the eventfd functionality. That
>>>> would save us quite a bit of complexity I think.
>>> Hm what do you mean exactly? I'm not sure I'm following.
>> Essentially we have the syncobj_wait_entry structure which is added to
>> the list whenever somebody starts to wait for a sequence which hasn't
>> materialized yet.
>>
>> Instead of extending this maybe just completely nuke the
>> syncobj_wait_entry handling and replace it with your implementation here.
> Hm, no sure I understand how that would work. We don't have an eventfd in the
> WAIT IOCTL...
>
> We could merge the two structs and figure out which callback to invoke based
> on which fields are filled, but I figured two separate structs would be make
> for a cleaner split.

In this case feel free to go ahead with this approach.

Just make sure that you always add the eventfd structure to the right 
list. For example the list_add in drm_syncobj_add_eventfd() doesn't seem 
to be correct of hand.

Regards,
Christian.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 1fbbc19f1ac0..4244e929b7f9 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -242,6 +242,9 @@  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
 int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_private);
+int drm_syncobj_timeline_register_eventfd_ioctl(struct drm_device *dev,
+						void *data,
+						struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ca2a6e6101dc..dcd18dba28b7 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -702,6 +702,9 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
 		      DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD,
+		      drm_syncobj_timeline_register_eventfd_ioctl,
+		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..401d09b56cf1 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -185,6 +185,7 @@ 
 
 #include <linux/anon_inodes.h>
 #include <linux/dma-fence-unwrap.h>
+#include <linux/eventfd.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sched/signal.h>
@@ -212,6 +213,17 @@  struct syncobj_wait_entry {
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
 				      struct syncobj_wait_entry *wait);
 
+struct syncobj_eventfd_entry {
+	struct list_head node;
+	struct dma_fence_cb fence_cb;
+	struct eventfd_ctx *ev_fd_ctx;
+	u64 point;
+};
+
+static void
+syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
+			   struct syncobj_eventfd_entry *entry);
+
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -274,6 +286,25 @@  static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
 	spin_unlock(&syncobj->lock);
 }
 
+static void
+syncobj_eventfd_entry_free(struct syncobj_eventfd_entry *entry)
+{
+	eventfd_ctx_put(entry->ev_fd_ctx);
+	/* This happens inside the syncobj lock */
+	list_del(&entry->node);
+	kfree(entry);
+}
+
+static void
+drm_syncobj_add_eventfd(struct drm_syncobj *syncobj,
+			struct syncobj_eventfd_entry *entry)
+{
+	spin_lock(&syncobj->lock);
+	list_add_tail(&entry->node, &syncobj->cb_list);
+	syncobj_eventfd_entry_func(syncobj, entry);
+	spin_unlock(&syncobj->lock);
+}
+
 /**
  * drm_syncobj_add_point - add new timeline point to the syncobj
  * @syncobj: sync object to add timeline point do
@@ -288,7 +319,8 @@  void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 			   struct dma_fence *fence,
 			   uint64_t point)
 {
-	struct syncobj_wait_entry *cur, *tmp;
+	struct syncobj_wait_entry *wait_cur, *wait_tmp;
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
 	struct dma_fence *prev;
 
 	dma_fence_get(fence);
@@ -302,8 +334,10 @@  void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 	dma_fence_chain_init(chain, prev, fence, point);
 	rcu_assign_pointer(syncobj->fence, &chain->base);
 
-	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-		syncobj_wait_syncobj_func(syncobj, cur);
+	list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
+		syncobj_wait_syncobj_func(syncobj, wait_cur);
+	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+		syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
 	spin_unlock(&syncobj->lock);
 
 	/* Walk the chain once to trigger garbage collection */
@@ -323,7 +357,8 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence)
 {
 	struct dma_fence *old_fence;
-	struct syncobj_wait_entry *cur, *tmp;
+	struct syncobj_wait_entry *wait_cur, *wait_tmp;
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
 
 	if (fence)
 		dma_fence_get(fence);
@@ -335,8 +370,10 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	rcu_assign_pointer(syncobj->fence, fence);
 
 	if (fence != old_fence) {
-		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-			syncobj_wait_syncobj_func(syncobj, cur);
+		list_for_each_entry_safe(wait_cur, wait_tmp, &syncobj->cb_list, node)
+			syncobj_wait_syncobj_func(syncobj, wait_cur);
+		list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+			syncobj_eventfd_entry_func(syncobj, ev_fd_cur);
 	}
 
 	spin_unlock(&syncobj->lock);
@@ -472,7 +509,13 @@  void drm_syncobj_free(struct kref *kref)
 	struct drm_syncobj *syncobj = container_of(kref,
 						   struct drm_syncobj,
 						   refcount);
+	struct syncobj_eventfd_entry *ev_fd_cur, *ev_fd_tmp;
+
 	drm_syncobj_replace_fence(syncobj, NULL);
+
+	list_for_each_entry_safe(ev_fd_cur, ev_fd_tmp, &syncobj->ev_fd_list, node)
+		syncobj_eventfd_entry_free(ev_fd_cur);
+
 	kfree(syncobj);
 }
 EXPORT_SYMBOL(drm_syncobj_free);
@@ -501,6 +544,7 @@  int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags,
 
 	kref_init(&syncobj->refcount);
 	INIT_LIST_HEAD(&syncobj->cb_list);
+	INIT_LIST_HEAD(&syncobj->ev_fd_list);
 	spin_lock_init(&syncobj->lock);
 
 	if (flags & DRM_SYNCOBJ_CREATE_SIGNALED) {
@@ -1304,6 +1348,63 @@  drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static void
+syncobj_eventfd_entry_func(struct drm_syncobj *syncobj,
+			   struct syncobj_eventfd_entry *entry)
+{
+	int ret;
+	struct dma_fence *fence;
+
+	/* This happens inside the syncobj lock */
+	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+	ret = dma_fence_chain_find_seqno(&fence, entry->point);
+	dma_fence_put(fence);
+
+	if (ret != 0)
+		return;
+
+	eventfd_signal(entry->ev_fd_ctx, 1);
+	syncobj_eventfd_entry_free(entry);
+}
+
+int
+drm_syncobj_timeline_register_eventfd_ioctl(struct drm_device *dev,
+					    void *data,
+					    struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_register_eventfd *args = data;
+	struct drm_syncobj *syncobj;
+	struct eventfd_ctx *ev_fd_ctx;
+	struct syncobj_eventfd_entry *entry;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
+
+	/* TODO: support other flags? */
+	if (args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)
+		return -EINVAL;
+
+	syncobj = drm_syncobj_find(file_private, args->handle);
+	if (!syncobj)
+		return -ENOENT;
+
+	ev_fd_ctx = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(ev_fd_ctx))
+		return PTR_ERR(ev_fd_ctx);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		eventfd_ctx_put(ev_fd_ctx);
+		return -ENOMEM;
+	}
+	entry->ev_fd_ctx = ev_fd_ctx;
+	entry->point = args->point;
+
+	drm_syncobj_add_eventfd(syncobj, entry);
+	drm_syncobj_put(syncobj);
+
+	return 0;
+}
 
 int
 drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 6cf7243a1dc5..b40052132e52 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -54,7 +54,11 @@  struct drm_syncobj {
 	 */
 	struct list_head cb_list;
 	/**
-	 * @lock: Protects &cb_list and write-locks &fence.
+	 * @ev_fd_list: List of registered eventfd.
+	 */
+	struct list_head ev_fd_list;
+	/**
+	 * @lock: Protects &cb_list and &ev_fd_list, and write-locks &fence.
 	 */
 	spinlock_t lock;
 	/**
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 642808520d92..359e21414196 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -909,6 +909,20 @@  struct drm_syncobj_timeline_wait {
 	__u32 pad;
 };
 
+/**
+ * struct drm_syncobj_timeline_register_eventfd
+ *
+ * Register an eventfd to be signalled when a timeline point completes. The
+ * eventfd counter will be incremented by one.
+ */
+struct drm_syncobj_timeline_register_eventfd {
+	__u32 handle;
+	__u32 flags;
+	__u64 point;
+	__s32 fd;
+	__u32 pad;
+};
+
 
 struct drm_syncobj_array {
 	__u64 handles;
@@ -1095,6 +1109,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_REGISTER_EVENTFD	DRM_IOWR(0xCE, struct drm_syncobj_timeline_register_eventfd)
 
 /**
  * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.