diff mbox

drm/syncobj: Allow wait for submit and signal behavior (v2)

Message ID 1502298054-12260-1-git-send-email-jason.ekstrand@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jason Ekstrand Aug. 9, 2017, 5 p.m. UTC
Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
 - Fix a bug in the invalid syncobj error path
 - Unify the wait-all and wait-any cases

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
---

I realized today (this is what happens when you sleep) that it takes almost
no work to make the wait-any path also handle wait-all.  By unifying the
two, I deleted over a hundred lines of code from the implementation.

 drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
 include/uapi/drm/drm.h        |   1 +
 2 files changed, 199 insertions(+), 45 deletions(-)

Comments

Chris Wilson Aug. 9, 2017, 5:57 p.m. UTC | #1
Quoting Jason Ekstrand (2017-08-09 18:00:54)
> Vulkan VkFence semantics require that the application be able to perform
> a CPU wait on work which may not yet have been submitted.  This is
> perfectly safe because the CPU wait has a timeout which will get
> triggered eventually if no work is ever submitted.  This behavior is
> advantageous for multi-threaded workloads because, so long as all of the
> threads agree on what fences to use up-front, you don't have the extra
> cross-thread synchronization cost of thread A telling thread B that it
> has submitted its dependent work and thread B is now free to wait.
> 
> Within a single process, this can be implemented in the userspace driver
> by doing exactly the same kind of tracking the app would have to do
> using posix condition variables or similar.  However, in order for this
> to work cross-process (as is required by VK_KHR_external_fence), we need
> to handle this in the kernel.
> 
> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
> instructs the IOCTL to wait for the syncobj to have a non-null fence and
> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
> easily get the Vulkan behavior.
> 
> v2:
>  - Fix a bug in the invalid syncobj error path
>  - Unify the wait-all and wait-any cases
> 
> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Dave Airlie <airlied@redhat.com>
> ---
> 
> I realized today (this is what happens when you sleep) that it takes almost
> no work to make the wait-any path also handle wait-all.  By unifying the
> two, I deleted over a hundred lines of code from the implementation.
> 
>  drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
>  include/uapi/drm/drm.h        |   1 +
>  2 files changed, 199 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 510dfc2..5e7f654 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -51,6 +51,7 @@
>  #include <linux/fs.h>
>  #include <linux/anon_inodes.h>
>  #include <linux/sync_file.h>
> +#include <linux/sched/signal.h>
>  
>  #include "drm_internal.h"
>  #include <drm/drm_syncobj.h>
> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,

This is a bit of the puzzle that is missing.

>         list_add_tail(&cb->node, &syncobj->cb_list);
>  }
>  
> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
> +                                                struct dma_fence **fence,
> +                                                struct drm_syncobj_cb *cb,
> +                                                drm_syncobj_func_t func)
> +{
> +       int ret;
> +
> +       spin_lock(&syncobj->lock);
> +       if (syncobj->fence) {
> +               *fence = dma_fence_get(syncobj->fence);
> +               ret = 1;
> +       } else {
> +               *fence = NULL;
> +               drm_syncobj_add_callback_locked(syncobj, cb, func);
> +               ret = 0;
> +       }
> +       spin_unlock(&syncobj->lock);
> +
> +       return ret;
> +}
> +
>  /**
>   * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>   * @syncobj: Sync object to which to add the callback
> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>                                         &args->handle);
>  }
>  
> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
> +                               uint32_t wait_flags)
> +{
> +       struct dma_fence *fence;
> +       int ret;
> +
> +       fence = drm_syncobj_fence_get(syncobj);
> +       if (!fence) {
> +               if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +                       return 0;
> +               else
> +                       return -EINVAL;
> +       }
> +
> +       ret = dma_fence_is_signaled(fence) ? 1 : 0;
> +
> +       dma_fence_put(fence);
> +
> +       return ret;
> +}
> +
> +struct syncobj_wait_entry {
> +       struct task_struct *task;
> +       struct dma_fence *fence;
> +       struct dma_fence_cb fence_cb;
> +       struct drm_syncobj_cb syncobj_cb;
> +};
> +
> +static void syncobj_wait_fence_func(struct dma_fence *fence,
> +                                   struct dma_fence_cb *cb)
> +{
> +       struct syncobj_wait_entry *wait =
> +               container_of(cb, struct syncobj_wait_entry, fence_cb);
> +
> +       wake_up_process(wait->task);
> +}
> +
> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
> +                                     struct drm_syncobj_cb *cb)
> +{
> +       struct syncobj_wait_entry *wait =
> +               container_of(cb, struct syncobj_wait_entry, syncobj_cb);
> +
> +       /* This happens inside the syncobj lock */
> +       wait->fence = dma_fence_get(syncobj->fence);
> +       wake_up_process(wait->task);
> +}
> +
> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> +                                                 uint32_t count,
> +                                                 uint32_t flags,
> +                                                 signed long timeout,
> +                                                 uint32_t *idx)
> +{
> +       struct syncobj_wait_entry *entries;
> +       struct dma_fence *fence;
> +       signed long ret;
> +       uint32_t signaled_count, i;
> +
> +       if (timeout == 0) {
> +               signaled_count = 0;
> +               for (i = 0; i < count; ++i) {
> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
> +                       if (ret < 0)
> +                               return ret;
> +                       if (ret == 0)
> +                               continue;
> +                       if (signaled_count == 0 && idx)
> +                               *idx = i;
> +                       signaled_count++;
> +               }
> +
> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> +                       return signaled_count == count ? 1 : 0;
> +               else
> +                       return signaled_count > 0 ? 1 : 0;
> +       }
> +
> +       entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
> +       if (!entries)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < count; ++i) {
> +               entries[i].task = current;
> +
> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> +                       drm_syncobj_fence_get_or_add_callback(syncobjs[i],
> +                                                             &entries[i].fence,
> +                                                             &entries[i].syncobj_cb,
> +                                                             syncobj_wait_syncobj_func);
> +               } else {
> +                       entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
> +                       if (!entries[i].fence) {
> +                               ret = -EINVAL;
> +                               goto err_cleanup_entries;
> +                       }

So we are taking a snapshot here. It looks like this could have been
done using a dma_fence_array + dma_fence_proxy for capturing the future
fence.

> +       ret = timeout;
> +       while (ret > 0) {
> +               signaled_count = 0;
> +               for (i = 0; i < count; ++i) {
> +                       fence = entries[i].fence;
> +                       if (!fence)
> +                               continue;
> +
> +                       if (dma_fence_is_signaled(fence) ||
> +                           (!entries[i].fence_cb.func &&
> +                            dma_fence_add_callback(fence,
> +                                                   &entries[i].fence_cb,
> +                                                   syncobj_wait_fence_func))) {
> +                               /* The fence has been signaled */
> +                               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
> +                                       signaled_count++;
> +                               } else {
> +                                       if (idx)
> +                                               *idx = i;
> +                                       goto done_waiting;
> +                               }
> +                       }
> +               }
> +
> +               if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
> +                   signaled_count == count)
> +                       goto done_waiting;
> +
> +               set_current_state(TASK_INTERRUPTIBLE);

This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
before doing any checks. So that if any state changes whilst you are in
the middle of those checks, the schedule_timeout is a nop and you can
check again.
-Chris
Christian König Aug. 9, 2017, 6:25 p.m. UTC | #2
Am 09.08.2017 um 19:57 schrieb Chris Wilson:
> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>> Vulkan VkFence semantics require that the application be able to perform
>> a CPU wait on work which may not yet have been submitted.  This is
>> perfectly safe because the CPU wait has a timeout which will get
>> triggered eventually if no work is ever submitted.  This behavior is
>> advantageous for multi-threaded workloads because, so long as all of the
>> threads agree on what fences to use up-front, you don't have the extra
>> cross-thread synchronization cost of thread A telling thread B that it
>> has submitted its dependent work and thread B is now free to wait.
>>
>> Within a single process, this can be implemented in the userspace driver
>> by doing exactly the same kind of tracking the app would have to do
>> using posix condition variables or similar.  However, in order for this
>> to work cross-process (as is required by VK_KHR_external_fence), we need
>> to handle this in the kernel.
>>
>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
>> instructs the IOCTL to wait for the syncobj to have a non-null fence and
>> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
>> easily get the Vulkan behavior.
>>
>> v2:
>>   - Fix a bug in the invalid syncobj error path
>>   - Unify the wait-all and wait-any cases
>>
>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
>> Cc: Dave Airlie <airlied@redhat.com>
>> ---
>>
>> I realized today (this is what happens when you sleep) that it takes almost
>> no work to make the wait-any path also handle wait-all.  By unifying the
>> two, I deleted over a hundred lines of code from the implementation.
>>
>>   drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++++++--------
>>   include/uapi/drm/drm.h        |   1 +
>>   2 files changed, 199 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 510dfc2..5e7f654 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -51,6 +51,7 @@
>>   #include <linux/fs.h>
>>   #include <linux/anon_inodes.h>
>>   #include <linux/sync_file.h>
>> +#include <linux/sched/signal.h>
>>   
>>   #include "drm_internal.h"
>>   #include <drm/drm_syncobj.h>
>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
> This is a bit of the puzzle that is missing.
>
>>          list_add_tail(&cb->node, &syncobj->cb_list);
>>   }
>>   
>> +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
>> +                                                struct dma_fence **fence,
>> +                                                struct drm_syncobj_cb *cb,
>> +                                                drm_syncobj_func_t func)
>> +{
>> +       int ret;
>> +
>> +       spin_lock(&syncobj->lock);
>> +       if (syncobj->fence) {
>> +               *fence = dma_fence_get(syncobj->fence);
>> +               ret = 1;
>> +       } else {
>> +               *fence = NULL;
>> +               drm_syncobj_add_callback_locked(syncobj, cb, func);
>> +               ret = 0;
>> +       }
>> +       spin_unlock(&syncobj->lock);
>> +
>> +       return ret;
>> +}
>> +
>>   /**
>>    * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>>    * @syncobj: Sync object to which to add the callback
>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>                                          &args->handle);
>>   }
>>   
>> +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
>> +                               uint32_t wait_flags)
>> +{
>> +       struct dma_fence *fence;
>> +       int ret;
>> +
>> +       fence = drm_syncobj_fence_get(syncobj);
>> +       if (!fence) {
>> +               if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> +                       return 0;
>> +               else
>> +                       return -EINVAL;
>> +       }
>> +
>> +       ret = dma_fence_is_signaled(fence) ? 1 : 0;
>> +
>> +       dma_fence_put(fence);
>> +
>> +       return ret;
>> +}
>> +
>> +struct syncobj_wait_entry {
>> +       struct task_struct *task;
>> +       struct dma_fence *fence;
>> +       struct dma_fence_cb fence_cb;
>> +       struct drm_syncobj_cb syncobj_cb;
>> +};
>> +
>> +static void syncobj_wait_fence_func(struct dma_fence *fence,
>> +                                   struct dma_fence_cb *cb)
>> +{
>> +       struct syncobj_wait_entry *wait =
>> +               container_of(cb, struct syncobj_wait_entry, fence_cb);
>> +
>> +       wake_up_process(wait->task);
>> +}
>> +
>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>> +                                     struct drm_syncobj_cb *cb)
>> +{
>> +       struct syncobj_wait_entry *wait =
>> +               container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>> +
>> +       /* This happens inside the syncobj lock */
>> +       wait->fence = dma_fence_get(syncobj->fence);
>> +       wake_up_process(wait->task);
>> +}
>> +
>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> +                                                 uint32_t count,
>> +                                                 uint32_t flags,
>> +                                                 signed long timeout,
>> +                                                 uint32_t *idx)
>> +{
>> +       struct syncobj_wait_entry *entries;
>> +       struct dma_fence *fence;
>> +       signed long ret;
>> +       uint32_t signaled_count, i;
>> +
>> +       if (timeout == 0) {
>> +               signaled_count = 0;
>> +               for (i = 0; i < count; ++i) {
>> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
>> +                       if (ret < 0)
>> +                               return ret;
>> +                       if (ret == 0)
>> +                               continue;
>> +                       if (signaled_count == 0 && idx)
>> +                               *idx = i;
>> +                       signaled_count++;
>> +               }
>> +
>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +                       return signaled_count == count ? 1 : 0;
>> +               else
>> +                       return signaled_count > 0 ? 1 : 0;
>> +       }
>> +
>> +       entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>> +       if (!entries)
>> +               return -ENOMEM;
>> +
>> +       for (i = 0; i < count; ++i) {
>> +               entries[i].task = current;
>> +
>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>> +                       drm_syncobj_fence_get_or_add_callback(syncobjs[i],
>> +                                                             &entries[i].fence,
>> +                                                             &entries[i].syncobj_cb,
>> +                                                             syncobj_wait_syncobj_func);
>> +               } else {
>> +                       entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
>> +                       if (!entries[i].fence) {
>> +                               ret = -EINVAL;
>> +                               goto err_cleanup_entries;
>> +                       }
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for capturing the future
> fence.
>
>> +       ret = timeout;
>> +       while (ret > 0) {
>> +               signaled_count = 0;
>> +               for (i = 0; i < count; ++i) {
>> +                       fence = entries[i].fence;
>> +                       if (!fence)
>> +                               continue;
>> +
>> +                       if (dma_fence_is_signaled(fence) ||
>> +                           (!entries[i].fence_cb.func &&
>> +                            dma_fence_add_callback(fence,
>> +                                                   &entries[i].fence_cb,
>> +                                                   syncobj_wait_fence_func))) {
>> +                               /* The fence has been signaled */
>> +                               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>> +                                       signaled_count++;
>> +                               } else {
>> +                                       if (idx)
>> +                                               *idx = i;
>> +                                       goto done_waiting;
>> +                               }
>> +                       }
>> +               }
>> +
>> +               if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
>> +                   signaled_count == count)
>> +                       goto done_waiting;
>> +
>> +               set_current_state(TASK_INTERRUPTIBLE);
> This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
> before doing any checks. So that if any state changes whilst you are in
> the middle of those checks, the schedule_timeout is a nop and you can
> check again.

Yeah, completely agree.

I would rather drop the approach with the custom wait and try to use 
wait_event_interruptible here.

As far as I can see that should make the whole patch much cleaner in 
general.

Regards,
Christian.

> -Chris
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Jason Ekstrand Aug. 9, 2017, 9:09 p.m. UTC | #3
On Wed, Aug 9, 2017 at 11:25 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 09.08.2017 um 19:57 schrieb Chris Wilson:
>
>> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>>
>>> Vulkan VkFence semantics require that the application be able to perform
>>> a CPU wait on work which may not yet have been submitted.  This is
>>> perfectly safe because the CPU wait has a timeout which will get
>>> triggered eventually if no work is ever submitted.  This behavior is
>>> advantageous for multi-threaded workloads because, so long as all of the
>>> threads agree on what fences to use up-front, you don't have the extra
>>> cross-thread synchronization cost of thread A telling thread B that it
>>> has submitted its dependent work and thread B is now free to wait.
>>>
>>> Within a single process, this can be implemented in the userspace driver
>>> by doing exactly the same kind of tracking the app would have to do
>>> using posix condition variables or similar.  However, in order for this
>>> to work cross-process (as is required by VK_KHR_external_fence), we need
>>> to handle this in the kernel.
>>>
>>> This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
>>> instructs the IOCTL to wait for the syncobj to have a non-null fence and
>>> then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
>>> easily get the Vulkan behavior.
>>>
>>> v2:
>>>   - Fix a bug in the invalid syncobj error path
>>>   - Unify the wait-all and wait-any cases
>>>
>>> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
>>> Cc: Dave Airlie <airlied@redhat.com>
>>> ---
>>>
>>> I realized today (this is what happens when you sleep) that it takes
>>> almost
>>> no work to make the wait-any path also handle wait-all.  By unifying the
>>> two, I deleted over a hundred lines of code from the implementation.
>>>
>>>   drivers/gpu/drm/drm_syncobj.c | 243 ++++++++++++++++++++++++++++++
>>> ++++--------
>>>   include/uapi/drm/drm.h        |   1 +
>>>   2 files changed, 199 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 510dfc2..5e7f654 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -51,6 +51,7 @@
>>>   #include <linux/fs.h>
>>>   #include <linux/anon_inodes.h>
>>>   #include <linux/sync_file.h>
>>> +#include <linux/sched/signal.h>
>>>     #include "drm_internal.h"
>>>   #include <drm/drm_syncobj.h>
>>> @@ -104,6 +105,27 @@ static void drm_syncobj_add_callback_locked(struct
>>> drm_syncobj *syncobj,
>>>
>> This is a bit of the puzzle that is missing.
>>
>
?  It's in the previous patch.


>          list_add_tail(&cb->node, &syncobj->cb_list);
>>>   }
>>>   +static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj
>>> *syncobj,
>>> +                                                struct dma_fence
>>> **fence,
>>> +                                                struct drm_syncobj_cb
>>> *cb,
>>> +                                                drm_syncobj_func_t func)
>>> +{
>>> +       int ret;
>>> +
>>> +       spin_lock(&syncobj->lock);
>>> +       if (syncobj->fence) {
>>> +               *fence = dma_fence_get(syncobj->fence);
>>> +               ret = 1;
>>> +       } else {
>>> +               *fence = NULL;
>>> +               drm_syncobj_add_callback_locked(syncobj, cb, func);
>>> +               ret = 0;
>>> +       }
>>> +       spin_unlock(&syncobj->lock);
>>> +
>>> +       return ret;
>>> +}
>>> +
>>>   /**
>>>    * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
>>>    * @syncobj: Sync object to which to add the callback
>>> @@ -526,6 +548,159 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device
>>> *dev, void *data,
>>>                                          &args->handle);
>>>   }
>>>   +static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
>>> +                               uint32_t wait_flags)
>>> +{
>>> +       struct dma_fence *fence;
>>> +       int ret;
>>> +
>>> +       fence = drm_syncobj_fence_get(syncobj);
>>> +       if (!fence) {
>>> +               if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>> +                       return 0;
>>> +               else
>>> +                       return -EINVAL;
>>> +       }
>>> +
>>> +       ret = dma_fence_is_signaled(fence) ? 1 : 0;
>>> +
>>> +       dma_fence_put(fence);
>>> +
>>> +       return ret;
>>> +}
>>> +
>>> +struct syncobj_wait_entry {
>>> +       struct task_struct *task;
>>> +       struct dma_fence *fence;
>>> +       struct dma_fence_cb fence_cb;
>>> +       struct drm_syncobj_cb syncobj_cb;
>>> +};
>>> +
>>> +static void syncobj_wait_fence_func(struct dma_fence *fence,
>>> +                                   struct dma_fence_cb *cb)
>>> +{
>>> +       struct syncobj_wait_entry *wait =
>>> +               container_of(cb, struct syncobj_wait_entry, fence_cb);
>>> +
>>> +       wake_up_process(wait->task);
>>> +}
>>> +
>>> +static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
>>> +                                     struct drm_syncobj_cb *cb)
>>> +{
>>> +       struct syncobj_wait_entry *wait =
>>> +               container_of(cb, struct syncobj_wait_entry, syncobj_cb);
>>> +
>>> +       /* This happens inside the syncobj lock */
>>> +       wait->fence = dma_fence_get(syncobj->fence);
>>> +       wake_up_process(wait->task);
>>> +}
>>> +
>>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj
>>> **syncobjs,
>>> +                                                 uint32_t count,
>>> +                                                 uint32_t flags,
>>> +                                                 signed long timeout,
>>> +                                                 uint32_t *idx)
>>> +{
>>> +       struct syncobj_wait_entry *entries;
>>> +       struct dma_fence *fence;
>>> +       signed long ret;
>>> +       uint32_t signaled_count, i;
>>> +
>>> +       if (timeout == 0) {
>>> +               signaled_count = 0;
>>> +               for (i = 0; i < count; ++i) {
>>> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
>>> +                       if (ret < 0)
>>> +                               return ret;
>>> +                       if (ret == 0)
>>> +                               continue;
>>> +                       if (signaled_count == 0 && idx)
>>> +                               *idx = i;
>>> +                       signaled_count++;
>>> +               }
>>> +
>>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>>> +                       return signaled_count == count ? 1 : 0;
>>> +               else
>>> +                       return signaled_count > 0 ? 1 : 0;
>>> +       }
>>> +
>>> +       entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
>>> +       if (!entries)
>>> +               return -ENOMEM;
>>> +
>>> +       for (i = 0; i < count; ++i) {
>>> +               entries[i].task = current;
>>> +
>>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
>>> +                       drm_syncobj_fence_get_or_add_
>>> callback(syncobjs[i],
>>> +
>>>  &entries[i].fence,
>>> +
>>>  &entries[i].syncobj_cb,
>>> +
>>>  syncobj_wait_syncobj_func);
>>> +               } else {
>>> +                       entries[i].fence = drm_syncobj_fence_get(syncobjs
>>> [i]);
>>> +                       if (!entries[i].fence) {
>>> +                               ret = -EINVAL;
>>> +                               goto err_cleanup_entries;
>>> +                       }
>>>
>> So we are taking a snapshot here. It looks like this could have been
>> done using a dma_fence_array + dma_fence_proxy for capturing the future
>> fence.
>>
>
I'm not sure what you mean.  Is that something in the future fence series
that I should be looking at?


> +       ret = timeout;
>>> +       while (ret > 0) {
>>> +               signaled_count = 0;
>>> +               for (i = 0; i < count; ++i) {
>>> +                       fence = entries[i].fence;
>>> +                       if (!fence)
>>> +                               continue;
>>> +
>>> +                       if (dma_fence_is_signaled(fence) ||
>>> +                           (!entries[i].fence_cb.func &&
>>> +                            dma_fence_add_callback(fence,
>>> +                                                   &entries[i].fence_cb,
>>> +
>>>  syncobj_wait_fence_func))) {
>>> +                               /* The fence has been signaled */
>>> +                               if (flags &
>>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
>>> +                                       signaled_count++;
>>> +                               } else {
>>> +                                       if (idx)
>>> +                                               *idx = i;
>>> +                                       goto done_waiting;
>>> +                               }
>>> +                       }
>>> +               }
>>> +
>>> +               if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
>>> +                   signaled_count == count)
>>> +                       goto done_waiting;
>>> +
>>> +               set_current_state(TASK_INTERRUPTIBLE);
>>>
>> This is the first step in the wait-loop, you set TASK_INTERRUPTIBLE
>> before doing any checks. So that if any state changes whilst you are in
>> the middle of those checks, the schedule_timeout is a nop and you can
>> check again.
>>
>
> Yeah, completely agree.
>
> I would rather drop the approach with the custom wait and try to use
> wait_event_interruptible here.
>
> As far as I can see that should make the whole patch much cleaner in
> general.
>

Sounds good to me.

--Jason
Chris Wilson Aug. 9, 2017, 9:31 p.m. UTC | #4
Quoting Jason Ekstrand (2017-08-09 18:00:54)
> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> +                                                 uint32_t count,
> +                                                 uint32_t flags,
> +                                                 signed long timeout,
> +                                                 uint32_t *idx)
> +{
> +       struct syncobj_wait_entry *entries;
> +       struct dma_fence *fence;
> +       signed long ret;
> +       uint32_t signaled_count, i;
> +
> +       if (timeout == 0) {
> +               signaled_count = 0;
> +               for (i = 0; i < count; ++i) {
> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
> +                       if (ret < 0)
> +                               return ret;
> +                       if (ret == 0)
> +                               continue;
> +                       if (signaled_count == 0 && idx)
> +                               *idx = i;
> +                       signaled_count++;
> +               }
> +
> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> +                       return signaled_count == count ? 1 : 0;
> +               else
> +                       return signaled_count > 0 ? 1 : 0;

There's a very annoying laxness in the dma_fence API here, in that
backends are not required to automatically report when a fence is
signaled prior to fence->ops->enable_signaling() being called.
So here if we fail to match signaled_count, we need to fallthough and
try a 0 timeout wait!

Christian, dma_fence_wait_any_timeout() has this same bug you told me off
for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
specified timeout is zero" (v2)")!
-Chris
Jason Ekstrand Aug. 9, 2017, 9:54 p.m. UTC | #5
On Wed, Aug 9, 2017 at 2:31 PM, Chris Wilson <chris@chris-wilson.co.uk>
wrote:

> Quoting Jason Ekstrand (2017-08-09 18:00:54)
> > +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj
> **syncobjs,
> > +                                                 uint32_t count,
> > +                                                 uint32_t flags,
> > +                                                 signed long timeout,
> > +                                                 uint32_t *idx)
> > +{
> > +       struct syncobj_wait_entry *entries;
> > +       struct dma_fence *fence;
> > +       signed long ret;
> > +       uint32_t signaled_count, i;
> > +
> > +       if (timeout == 0) {
> > +               signaled_count = 0;
> > +               for (i = 0; i < count; ++i) {
> > +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
> > +                       if (ret < 0)
> > +                               return ret;
> > +                       if (ret == 0)
> > +                               continue;
> > +                       if (signaled_count == 0 && idx)
> > +                               *idx = i;
> > +                       signaled_count++;
> > +               }
> > +
> > +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
> > +                       return signaled_count == count ? 1 : 0;
> > +               else
> > +                       return signaled_count > 0 ? 1 : 0;
>
> There's a very annoying laxness in the dma_fence API here, in that
> backends are not required to automatically report when a fence is
> signaled prior to fence->ops->enable_signaling() being called.
> So here if we fail to match signaled_count, we need to fallthough and
> try a 0 timeout wait!
>

That is very annoying!  I'll see how bad the fall-through is...


> Christian, dma_fence_wait_any_timeout() has this same bug you told me off
> for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
> specified timeout is zero" (v2)")!
> -Chris
>
Chris Wilson Aug. 9, 2017, 10:41 p.m. UTC | #6
Quoting Chris Wilson (2017-08-09 18:57:44)
> So we are taking a snapshot here. It looks like this could have been
> done using a dma_fence_array + dma_fence_proxy for capturing the future
> fence.

A quick sketch of this idea looks like:

 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
                               struct dma_fence *fence)
 {
-       struct dma_fence *old_fence;
+       unsigned long flags;
 
-       if (fence)
-               dma_fence_get(fence);
-       old_fence = xchg(&syncobj->fence, fence);
-
-       dma_fence_put(old_fence);
+       spin_lock_irqsave(&syncobj->lock, flags);
+       dma_fence_replace_proxy(&syncobj->fence, fence);
+       spin_unlock_irqrestore(&syncobj->lock, flags);
 }

+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_private)
+{
+       struct drm_syncobj_wait *args = data;
+       struct dma_fence **fences;
+       struct dma_fence_array *array;
+       unsigned long timeout;
+       unsigned int count;
+       u32 *handles;
+       int ret = 0;
+       u32 i;
+
+       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+               return -EINVAL;
+
+       count = args->count_handles;
+       if (!count)
+               return -EINVAL;
+
+       /* Get the handles from userspace */
+       fences = kmalloc_array(count,
+                              sizeof(struct dma_fence *),
+                              __GFP_NOWARN | GFP_KERNEL);
+       if (!fences)
+               return -ENOMEM;
+
+       handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles));
+       if (copy_from_user(handles,
+                          u64_to_user_ptr(args->handles),
+                          sizeof(u32) * count)) {
+               ret = -EFAULT;
+               goto err;
+       }
+
+       for (i = 0; i < count; i++) {
+               struct drm_syncobj *s;
+
+               ret = -ENOENT;
+               s = drm_syncobj_find(file_private, handles[i]);
+               if (s) {
+                       ret = 0;
+                       spin_lock_irq(&s->lock);
+                       if (!s->fence) {
+                               if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+                                       s->fence = dma_fence_create_proxy();
+                               else
+                                       ret = -EINVAL;
+                       }
+                       if (s->fence)
+                               fences[i] = dma_fence_get(s->fence);
+                       spin_unlock_irq(&s->lock);
+               }
+               if (ret) {
+                       count = i;
+                       goto err_fences;
+               }
+       }
+
+       array = dma_fence_array_create(count, fences, 0, 0,
+                                      !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
+       if (!array) {
+               ret = -ENOMEM;
+               goto err_fences;
+       }
+
+       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
+       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
+       args->first_signaled = array->first_signaled;
+       dma_fence_put(&array->base);
+
+       return timeout < 0 ? timeout : 0;
+
+err_fences:
+       for (i = 0; i < count; i++)
+               dma_fence_put(fences[i]);
+err:
+       kfree(fences);
+       return ret;
+}

The key advantage is that this translate the ioctl into a dma-fence-array
which already has to deal with the mess, sharing the burden. (But it
does require a trivial patch to dma-fence-array to record the first
signaled fence.)

However, this installs the proxy into syncobj->fence with the result
that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
of drm_syncobj is then quite inconsistent, sometimes it will wait for a
future fence, sometimes it will report an error.

Furthermore, do we want future-fences in the execbuf API? etc.
-Chris
Jason Ekstrand Aug. 9, 2017, 11:53 p.m. UTC | #7
On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson <chris@chris-wilson.co.uk>
wrote:

> Quoting Chris Wilson (2017-08-09 18:57:44)
> > So we are taking a snapshot here. It looks like this could have been
> > done using a dma_fence_array + dma_fence_proxy for capturing the future
> > fence.
>
> A quick sketch of this idea looks like:
>
>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>                                struct dma_fence *fence)
>  {
> -       struct dma_fence *old_fence;
> +       unsigned long flags;
>
> -       if (fence)
> -               dma_fence_get(fence);
> -       old_fence = xchg(&syncobj->fence, fence);
> -
> -       dma_fence_put(old_fence);
> +       spin_lock_irqsave(&syncobj->lock, flags);
> +       dma_fence_replace_proxy(&syncobj->fence, fence);
> +       spin_unlock_irqrestore(&syncobj->lock, flags);
>  }
>
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *file_private)
> +{
> +       struct drm_syncobj_wait *args = data;
> +       struct dma_fence **fences;
> +       struct dma_fence_array *array;
> +       unsigned long timeout;
> +       unsigned int count;
> +       u32 *handles;
> +       int ret = 0;
> +       u32 i;
> +
> +       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
> +
> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +               return -ENODEV;
> +
> +       if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_
> ALL)
> +               return -EINVAL;
> +
> +       count = args->count_handles;
> +       if (!count)
> +               return -EINVAL;
> +
> +       /* Get the handles from userspace */
> +       fences = kmalloc_array(count,
> +                              sizeof(struct dma_fence *),
> +                              __GFP_NOWARN | GFP_KERNEL);
> +       if (!fences)
> +               return -ENOMEM;
> +
> +       handles = (void *)fences + count * (sizeof(*fences) -
> sizeof(*handles));
> +       if (copy_from_user(handles,
> +                          u64_to_user_ptr(args->handles),
> +                          sizeof(u32) * count)) {
> +               ret = -EFAULT;
> +               goto err;
> +       }
> +
> +       for (i = 0; i < count; i++) {
> +               struct drm_syncobj *s;
> +
> +               ret = -ENOENT;
> +               s = drm_syncobj_find(file_private, handles[i]);
> +               if (s) {
> +                       ret = 0;
> +                       spin_lock_irq(&s->lock);
> +                       if (!s->fence) {
> +                               if (args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
> +                                       s->fence =
> dma_fence_create_proxy();
> +                               else
> +                                       ret = -EINVAL;
> +                       }
> +                       if (s->fence)
> +                               fences[i] = dma_fence_get(s->fence);
> +                       spin_unlock_irq(&s->lock);
> +               }
> +               if (ret) {
> +                       count = i;
> +                       goto err_fences;
> +               }
> +       }
> +
> +       array = dma_fence_array_create(count, fences, 0, 0,
> +                                      !(args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
> +       if (!array) {
> +               ret = -ENOMEM;
> +               goto err_fences;
> +       }
> +
> +       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
> +       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
> +       args->first_signaled = array->first_signaled;
> +       dma_fence_put(&array->base);
> +
> +       return timeout < 0 ? timeout : 0;
> +
> +err_fences:
> +       for (i = 0; i < count; i++)
> +               dma_fence_put(fences[i]);
> +err:
> +       kfree(fences);
> +       return ret;
> +}
>
> The key advantage is that this translate the ioctl into a dma-fence-array
> which already has to deal with the mess, sharing the burden. (But it
> does require a trivial patch to dma-fence-array to record the first
> signaled fence.)
>
> However, this installs the proxy into syncobj->fence with the result
> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
> of drm_syncobj is then quite inconsistent, sometimes it will wait for a
> future fence, sometimes it will report an error.
>

Yeah, that's not good.  I thought about a variety of solutions to try and
re-use more core dma_fence code.  Ultimately I chose the current one
because it takes a snapshot of the syncobjs and then, from that point
forward, it's consistent with its snapshot.  Nothing I was able to come up
with based on core dma_fence wait routines does that.

--Jason
Chris Wilson Aug. 10, 2017, 11 a.m. UTC | #8
Quoting Jason Ekstrand (2017-08-10 00:53:00)
> On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>     However, this installs the proxy into syncobj->fence with the result
>     that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
>     of drm_syncobj is then quite inconsistent, sometimes it will wait for a
>     future fence, sometimes it will report an error.
> 
> 
> Yeah, that's not good.  I thought about a variety of solutions to try and
> re-use more core dma_fence code.  Ultimately I chose the current one because it
> takes a snapshot of the syncobjs and then, from that point forward, it's
> consistent with its snapshot.  Nothing I was able to come up with based on core
> dma_fence wait routines does that.

So if we choose to keep the proxy local to the fence-array and not replace
syncobj->fence, we can still reduce this to a plain dma-fence-array +
wait.

Pertinent details:

+static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence *fence)
+{
+       struct drm_syncobj_cb *cb, *cn;
+       unsigned long flags;
+
+       /* This is just an opencoded waitqueue; FIXME! */
+       spin_lock_irqsave(&syncobj->lock, flags);
+       list_for_each_entry_safe(cb, cn, &syncobj->cb_list, link)
+               cb->func(cb, fence);
+       INIT_LIST_HEAD(&syncobj->cb_list);
+       spin_unlock_irqrestore(&syncobj->lock, flags);
+}
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
@@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 
        if (fence)
                dma_fence_get(fence);
+
        old_fence = xchg(&syncobj->fence, fence);
+       if (!old_fence && fence)
+               syncobj_notify(syncobj, fence);
 
        dma_fence_put(old_fence);
 }
@@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref)
        struct drm_syncobj *syncobj = container_of(kref,
                                                   struct drm_syncobj,
                                                   refcount);
+
+       syncobj_notify(syncobj, NULL);
+
        dma_fence_put(syncobj->fence);
        kfree(syncobj);
 }
@@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file *file_private,
                return -ENOMEM;
 
        kref_init(&syncobj->refcount);
+       spin_lock_init(&syncobj->lock);
+       INIT_LIST_HEAD(&syncobj->cb_list);
 
        idr_preload(GFP_KERNEL);
        spin_lock(&file_private->syncobj_table_lock);

struct future_fence {
+       struct drm_syncobj_cb base;
+       struct dma_fence **slot;
+};
+
+static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence *fence)
+{
+       struct future_fence *ff = container_of(cb, typeof(*ff), base);
+
+       if (fence)
+               dma_fence_replace_proxy(ff->slot, fence);
+       else
+               dma_fence_signal(*ff->slot);
+
+       kfree(ff);
+}
+
+int
+drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_private)
+{
+       struct drm_syncobj_wait *args = data;
+       struct dma_fence_array *array;
+       struct dma_fence **fences;
+       unsigned int count, i;
+       long timeout;
+       u32 *handles;
+       int ret;
+
+       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+                           DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+               return -EINVAL;
+
+       count = args->count_handles;
+       if (!count)
+               return -EINVAL;
+
+       /* Get the handles from userspace */
+       fences = kmalloc_array(count,
+                              sizeof(struct dma_fence *),
+                              __GFP_NOWARN | GFP_KERNEL);
+       if (!fences)
+               return -ENOMEM;
+
+       handles = (void *)fences + count * (sizeof(*fences) - sizeof(*handles));
+       if (copy_from_user(handles,
+                          u64_to_user_ptr(args->handles),
+                          sizeof(u32) * count)) {
+               ret = -EFAULT;
+               goto err;
+       }
+
+       for (i = 0; i < count; i++) {
+               struct drm_syncobj *s;
+
+               ret = -ENOENT;
+               s = drm_syncobj_find(file_private, handles[i]);
+               if (s) {
+                       ret = 0;
+                       spin_lock_irq(&s->lock);
+                       if (s->fence) {
+                               fences[i] = dma_fence_get(s->fence);
+                       } else if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+                               struct future_fence *cb;
+
+                               cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+                               fences[i] = dma_fence_create_proxy();
+                               if (cb && fences[i]) {
+                                       cb->slot = &fences[i];
+                                       cb->base.func = future_fence_cb;
+                                       list_add(&cb->base.link, &s->cb_list);
+                               } else {
+                                       kfree(cb);
+                                       dma_fence_put(fences[i]);
+                                       ret = -ENOMEM;
+                               }
+                       } else {
+                               ret = -EINVAL;
+                       }
+                       spin_unlock_irq(&s->lock);
+               }
+               if (ret) {
+                       count = i;
+                       goto err_fences;
+               }
+       }
+
+       array = dma_fence_array_create(count, fences, 0, 0,
+                                      !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
+       if (!array) {
+               ret = -ENOMEM;
+               goto err_fences;
+       }
+
+       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
+       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
+       args->first_signaled = array->first_signaled;
+       dma_fence_put(&array->base);
+
+       return timeout < 0 ? timeout : timeout == 0 ? -ETIME : 0; /* Gah. */
+
+err_fences:
+       for (i = 0; i < count; i++)
+               dma_fence_put(fences[i]);
+err:
+       kfree(fences);
+       return ret;
+}
Christian König Aug. 10, 2017, 12:26 p.m. UTC | #9
Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:
> On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson <chris@chris-wilson.co.uk 
> <mailto:chris@chris-wilson.co.uk>> wrote:
>
>     Quoting Chris Wilson (2017-08-09 18:57:44)
>     > So we are taking a snapshot here. It looks like this could have been
>     > done using a dma_fence_array + dma_fence_proxy for capturing the
>     future
>     > fence.
>
>     A quick sketch of this idea looks like:
>
>      void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>                                    struct dma_fence *fence)
>      {
>     -       struct dma_fence *old_fence;
>     +       unsigned long flags;
>
>     -       if (fence)
>     -               dma_fence_get(fence);
>     -       old_fence = xchg(&syncobj->fence, fence);
>     -
>     -       dma_fence_put(old_fence);
>     +       spin_lock_irqsave(&syncobj->lock, flags);
>     +       dma_fence_replace_proxy(&syncobj->fence, fence);
>     +       spin_unlock_irqrestore(&syncobj->lock, flags);
>      }
>
>     +int
>     +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>     +                      struct drm_file *file_private)
>     +{
>     +       struct drm_syncobj_wait *args = data;
>     +       struct dma_fence **fences;
>     +       struct dma_fence_array *array;
>     +       unsigned long timeout;
>     +       unsigned int count;
>     +       u32 *handles;
>     +       int ret = 0;
>     +       u32 i;
>     +
>     +       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
>     +
>     +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>     +               return -ENODEV;
>     +
>     +       if (args->flags != 0 && args->flags !=
>     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>     +               return -EINVAL;
>     +
>     +       count = args->count_handles;
>     +       if (!count)
>     +               return -EINVAL;
>     +
>     +       /* Get the handles from userspace */
>     +       fences = kmalloc_array(count,
>     +                              sizeof(struct dma_fence *),
>     +                              __GFP_NOWARN | GFP_KERNEL);
>     +       if (!fences)
>     +               return -ENOMEM;
>     +
>     +       handles = (void *)fences + count * (sizeof(*fences) -
>     sizeof(*handles));
>     +       if (copy_from_user(handles,
>     + u64_to_user_ptr(args->handles),
>     +                          sizeof(u32) * count)) {
>     +               ret = -EFAULT;
>     +               goto err;
>     +       }
>     +
>     +       for (i = 0; i < count; i++) {
>     +               struct drm_syncobj *s;
>     +
>     +               ret = -ENOENT;
>     +               s = drm_syncobj_find(file_private, handles[i]);
>     +               if (s) {
>     +                       ret = 0;
>     +                       spin_lock_irq(&s->lock);
>     +                       if (!s->fence) {
>     +                               if (args->flags &
>     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>     +                                       s->fence =
>     dma_fence_create_proxy();
>     +                               else
>     +                                       ret = -EINVAL;
>     +                       }
>     +                       if (s->fence)
>     +                               fences[i] = dma_fence_get(s->fence);
>     +                       spin_unlock_irq(&s->lock);
>     +               }
>     +               if (ret) {
>     +                       count = i;
>     +                       goto err_fences;
>     +               }
>     +       }
>     +
>     +       array = dma_fence_array_create(count, fences, 0, 0,
>     +                                      !(args->flags &
>     DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
>     +       if (!array) {
>     +               ret = -ENOMEM;
>     +               goto err_fences;
>     +       }
>     +
>     +       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
>     +       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
>     +       args->first_signaled = array->first_signaled;
>     +       dma_fence_put(&array->base);
>     +
>     +       return timeout < 0 ? timeout : 0;
>     +
>     +err_fences:
>     +       for (i = 0; i < count; i++)
>     +               dma_fence_put(fences[i]);
>     +err:
>     +       kfree(fences);
>     +       return ret;
>     +}
>
>     The key advantage is that this translate the ioctl into a
>     dma-fence-array
>     which already has to deal with the mess, sharing the burden. (But it
>     does require a trivial patch to dma-fence-array to record the first
>     signaled fence.)
>
>     However, this installs the proxy into syncobj->fence with the result
>     that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
>     of drm_syncobj is then quite inconsistent, sometimes it will wait
>     for a
>     future fence, sometimes it will report an error.
>
>
> Yeah, that's not good.  I thought about a variety of solutions to try 
> and re-use more core dma_fence code.  Ultimately I chose the current 
> one because it takes a snapshot of the syncobjs and then, from that 
> point forward, it's consistent with its snapshot.  Nothing I was able 
> to come up with based on core dma_fence wait routines does that.

As Chris pointed out, that's really not a good idea.

Most of the time we need the behavior of reporting an error and only 
when the flag is given wait until some fence shows up.

In general I suggest that we only support this use case in the form of a 
wait_event_interruptible() on setting the first fence on an object.

Waiting on the first one of multiple objects wouldn't be possible (you 
would always wait till all objects have fences), but I think that this 
is acceptable.

The big advantage of the wait_event_interruptible() interface is that 
your condition checking and process handling is bullet prove, as far as 
I have seen so far your code is a bit buggy in that direction.

Christian.

>
> --Jason
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Aug. 10, 2017, 12:26 p.m. UTC | #10
Am 09.08.2017 um 23:31 schrieb Chris Wilson:
> Quoting Jason Ekstrand (2017-08-09 18:00:54)
>> +static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>> +                                                 uint32_t count,
>> +                                                 uint32_t flags,
>> +                                                 signed long timeout,
>> +                                                 uint32_t *idx)
>> +{
>> +       struct syncobj_wait_entry *entries;
>> +       struct dma_fence *fence;
>> +       signed long ret;
>> +       uint32_t signaled_count, i;
>> +
>> +       if (timeout == 0) {
>> +               signaled_count = 0;
>> +               for (i = 0; i < count; ++i) {
>> +                       ret = drm_syncobj_signaled(syncobjs[i], flags);
>> +                       if (ret < 0)
>> +                               return ret;
>> +                       if (ret == 0)
>> +                               continue;
>> +                       if (signaled_count == 0 && idx)
>> +                               *idx = i;
>> +                       signaled_count++;
>> +               }
>> +
>> +               if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +                       return signaled_count == count ? 1 : 0;
>> +               else
>> +                       return signaled_count > 0 ? 1 : 0;
> There's a very annoying laxness in the dma_fence API here, in that
> backends are not required to automatically report when a fence is
> signaled prior to fence->ops->enable_signaling() being called.
> So here if we fail to match signaled_count, we need to fallthough and
> try a 0 timeout wait!
>
> Christian, dma_fence_wait_any_timeout() has this same bug you told me off
> for, e.g. commit 698c0f7ff216 ("dma-buf/fence: revert "don't wait when
> specified timeout is zero" (v2)")!

Thanks for pointing this out, going to take care of this issue.

Christian.

> -Chris
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Jason Ekstrand Aug. 10, 2017, 2:32 p.m. UTC | #11
On Thu, Aug 10, 2017 at 5:26 AM, Christian König <deathsimple@vodafone.de>
wrote:

> Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:
>
> On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson <chris@chris-wilson.co.uk>
> wrote:
>
>> Quoting Chris Wilson (2017-08-09 18:57:44)
>> > So we are taking a snapshot here. It looks like this could have been
>> > done using a dma_fence_array + dma_fence_proxy for capturing the future
>> > fence.
>>
>> A quick sketch of this idea looks like:
>>
>>  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>                                struct dma_fence *fence)
>>  {
>> -       struct dma_fence *old_fence;
>> +       unsigned long flags;
>>
>> -       if (fence)
>> -               dma_fence_get(fence);
>> -       old_fence = xchg(&syncobj->fence, fence);
>> -
>> -       dma_fence_put(old_fence);
>> +       spin_lock_irqsave(&syncobj->lock, flags);
>> +       dma_fence_replace_proxy(&syncobj->fence, fence);
>> +       spin_unlock_irqrestore(&syncobj->lock, flags);
>>  }
>>
>> +int
>> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>> +                      struct drm_file *file_private)
>> +{
>> +       struct drm_syncobj_wait *args = data;
>> +       struct dma_fence **fences;
>> +       struct dma_fence_array *array;
>> +       unsigned long timeout;
>> +       unsigned int count;
>> +       u32 *handles;
>> +       int ret = 0;
>> +       u32 i;
>> +
>> +       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
>> +
>> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +               return -ENODEV;
>> +
>> +       if (args->flags != 0 && args->flags !=
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>> +               return -EINVAL;
>> +
>> +       count = args->count_handles;
>> +       if (!count)
>> +               return -EINVAL;
>> +
>> +       /* Get the handles from userspace */
>> +       fences = kmalloc_array(count,
>> +                              sizeof(struct dma_fence *),
>> +                              __GFP_NOWARN | GFP_KERNEL);
>> +       if (!fences)
>> +               return -ENOMEM;
>> +
>> +       handles = (void *)fences + count * (sizeof(*fences) -
>> sizeof(*handles));
>> +       if (copy_from_user(handles,
>> +                          u64_to_user_ptr(args->handles),
>> +                          sizeof(u32) * count)) {
>> +               ret = -EFAULT;
>> +               goto err;
>> +       }
>> +
>> +       for (i = 0; i < count; i++) {
>> +               struct drm_syncobj *s;
>> +
>> +               ret = -ENOENT;
>> +               s = drm_syncobj_find(file_private, handles[i]);
>> +               if (s) {
>> +                       ret = 0;
>> +                       spin_lock_irq(&s->lock);
>> +                       if (!s->fence) {
>> +                               if (args->flags &
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>> +                                       s->fence =
>> dma_fence_create_proxy();
>> +                               else
>> +                                       ret = -EINVAL;
>> +                       }
>> +                       if (s->fence)
>> +                               fences[i] = dma_fence_get(s->fence);
>> +                       spin_unlock_irq(&s->lock);
>> +               }
>> +               if (ret) {
>> +                       count = i;
>> +                       goto err_fences;
>> +               }
>> +       }
>> +
>> +       array = dma_fence_array_create(count, fences, 0, 0,
>> +                                      !(args->flags &
>> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
>> +       if (!array) {
>> +               ret = -ENOMEM;
>> +               goto err_fences;
>> +       }
>> +
>> +       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
>> +       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
>> +       args->first_signaled = array->first_signaled;
>> +       dma_fence_put(&array->base);
>> +
>> +       return timeout < 0 ? timeout : 0;
>> +
>> +err_fences:
>> +       for (i = 0; i < count; i++)
>> +               dma_fence_put(fences[i]);
>> +err:
>> +       kfree(fences);
>> +       return ret;
>> +}
>>
>> The key advantage is that this translate the ioctl into a dma-fence-array
>> which already has to deal with the mess, sharing the burden. (But it
>> does require a trivial patch to dma-fence-array to record the first
>> signaled fence.)
>>
>> However, this installs the proxy into syncobj->fence with the result
>> that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
>> of drm_syncobj is then quite inconsistent, sometimes it will wait for a
>> future fence, sometimes it will report an error.
>>
>
> Yeah, that's not good.  I thought about a variety of solutions to try and
> re-use more core dma_fence code.  Ultimately I chose the current one
> because it takes a snapshot of the syncobjs and then, from that point
> forward, it's consistent with its snapshot.  Nothing I was able to come up
> with based on core dma_fence wait routines does that.
>
>
> As Chris pointed out, that's really not a good idea.
>

What isn't a good idea?


> Most of the time we need the behavior of reporting an error and only when
> the flag is given wait until some fence shows up.
>
> In general I suggest that we only support this use case in the form of a
> wait_event_interruptible() on setting the first fence on an object.
>
> Waiting on the first one of multiple objects wouldn't be possible (you
> would always wait till all objects have fences), but I think that this is
> acceptable.
>

Not really.  If you're doing a wait-any, then you want it to return as soon
as you have a signaled fence even if half your sync objects never get
fences.  At least that's what's required for implementing vkWaitForFences.
The idea is that, if the WAIT_FOR_SUBMIT flag is set, then a syncobj
without a fence and a syncobj with an untriggered fence are identical as
far as waiting is concerned:  You wait until it has a signaled fence.


> The big advantage of the wait_event_interruptible() interface is that your
> condition checking and process handling is bullet prove, as far as I have
> seen so far your code is a bit buggy in that direction.
>

In v3, I used wait_event_interruptible.  Does it have those same bugs?

--Jason


> Christian.
>
>
> --Jason
>
>
> _______________________________________________
> dri-devel mailing listdri-devel@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
Christian König Aug. 10, 2017, 2:41 p.m. UTC | #12
Am 10.08.2017 um 16:32 schrieb Jason Ekstrand:
> On Thu, Aug 10, 2017 at 5:26 AM, Christian König 
> <deathsimple@vodafone.de <mailto:deathsimple@vodafone.de>> wrote:
>
>     Am 10.08.2017 um 01:53 schrieb Jason Ekstrand:
>>     On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson
>>     <chris@chris-wilson.co.uk <mailto:chris@chris-wilson.co.uk>> wrote:
>>
>>         Quoting Chris Wilson (2017-08-09 18:57:44)
>>         > So we are taking a snapshot here. It looks like this could have been
>>         > done using a dma_fence_array + dma_fence_proxy for
>>         capturing the future
>>         > fence.
>>
>>         A quick sketch of this idea looks like:
>>
>>          void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
>>                                        struct dma_fence *fence)
>>          {
>>         -       struct dma_fence *old_fence;
>>         +       unsigned long flags;
>>
>>         -       if (fence)
>>         -               dma_fence_get(fence);
>>         -       old_fence = xchg(&syncobj->fence, fence);
>>         -
>>         -       dma_fence_put(old_fence);
>>         +  spin_lock_irqsave(&syncobj->lock, flags);
>>         +       dma_fence_replace_proxy(&syncobj->fence, fence);
>>         +       spin_unlock_irqrestore(&syncobj->lock, flags);
>>          }
>>
>>         +int
>>         +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>         +                      struct drm_file *file_private)
>>         +{
>>         +       struct drm_syncobj_wait *args = data;
>>         +       struct dma_fence **fences;
>>         +       struct dma_fence_array *array;
>>         +       unsigned long timeout;
>>         +       unsigned int count;
>>         +       u32 *handles;
>>         +       int ret = 0;
>>         +       u32 i;
>>         +
>>         +       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
>>         +
>>         +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>         +               return -ENODEV;
>>         +
>>         +       if (args->flags != 0 && args->flags !=
>>         DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
>>         +               return -EINVAL;
>>         +
>>         +       count = args->count_handles;
>>         +       if (!count)
>>         +               return -EINVAL;
>>         +
>>         +       /* Get the handles from userspace */
>>         +       fences = kmalloc_array(count,
>>         + sizeof(struct dma_fence *),
>>         + __GFP_NOWARN | GFP_KERNEL);
>>         +       if (!fences)
>>         +               return -ENOMEM;
>>         +
>>         +       handles = (void *)fences + count * (sizeof(*fences) -
>>         sizeof(*handles));
>>         +       if (copy_from_user(handles,
>>         + u64_to_user_ptr(args->handles),
>>         +                          sizeof(u32) * count)) {
>>         +               ret = -EFAULT;
>>         +               goto err;
>>         +       }
>>         +
>>         +       for (i = 0; i < count; i++) {
>>         +               struct drm_syncobj *s;
>>         +
>>         +               ret = -ENOENT;
>>         +               s = drm_syncobj_find(file_private, handles[i]);
>>         +               if (s) {
>>         +                       ret = 0;
>>         +  spin_lock_irq(&s->lock);
>>         +                       if (!s->fence) {
>>         +                               if (args->flags &
>>         DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
>>         +  s->fence = dma_fence_create_proxy();
>>         +                               else
>>         +    ret = -EINVAL;
>>         +                       }
>>         +                       if (s->fence)
>>         +                               fences[i] =
>>         dma_fence_get(s->fence);
>>         +  spin_unlock_irq(&s->lock);
>>         +               }
>>         +               if (ret) {
>>         +                       count = i;
>>         +                       goto err_fences;
>>         +               }
>>         +       }
>>         +
>>         +       array = dma_fence_array_create(count, fences, 0, 0,
>>         + !(args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
>>         +       if (!array) {
>>         +               ret = -ENOMEM;
>>         +               goto err_fences;
>>         +       }
>>         +
>>         +       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
>>         +       timeout = dma_fence_wait_timeout(&array->base, true,
>>         timeout);
>>         +       args->first_signaled = array->first_signaled;
>>         +  dma_fence_put(&array->base);
>>         +
>>         +       return timeout < 0 ? timeout : 0;
>>         +
>>         +err_fences:
>>         +       for (i = 0; i < count; i++)
>>         +               dma_fence_put(fences[i]);
>>         +err:
>>         +       kfree(fences);
>>         +       return ret;
>>         +}
>>
>>         The key advantage is that this translate the ioctl into a
>>         dma-fence-array
>>         which already has to deal with the mess, sharing the burden.
>>         (But it
>>         does require a trivial patch to dma-fence-array to record the
>>         first
>>         signaled fence.)
>>
>>         However, this installs the proxy into syncobj->fence with the
>>         result
>>         that any concurrent wait also become a WAIT_FOR_SUBMIT. The
>>         behaviour
>>         of drm_syncobj is then quite inconsistent, sometimes it will
>>         wait for a
>>         future fence, sometimes it will report an error.
>>
>>
>>     Yeah, that's not good. I thought about a variety of solutions to
>>     try and re-use more core dma_fence code. Ultimately I chose the
>>     current one because it takes a snapshot of the syncobjs and then,
>>     from that point forward, it's consistent with its snapshot. 
>>     Nothing I was able to come up with based on core dma_fence wait
>>     routines does that.
>
>     As Chris pointed out, that's really not a good idea.
>
>
> What isn't a good idea?

> However, this installs the proxy into syncobj->fence with the result
> that any concurrent wait also become a WAIT_FOR_SUBMIT.

Installing that proxy. I'm always happy if someone reuses the code 
(after all I wrote a good part of it), but I think we should rather try 
to make this as clean as possible.

>     Most of the time we need the behavior of reporting an error and
>     only when the flag is given wait until some fence shows up.
>
>     In general I suggest that we only support this use case in the
>     form of a wait_event_interruptible() on setting the first fence on
>     an object.
>
>     Waiting on the first one of multiple objects wouldn't be possible
>     (you would always wait till all objects have fences), but I think
>     that this is acceptable.
>
>
> Not really.  If you're doing a wait-any, then you want it to return as 
> soon as you have a signaled fence even if half your sync objects never 
> get fences.  At least that's what's required for implementing 
> vkWaitForFences.  The idea is that, if the WAIT_FOR_SUBMIT flag is 
> set, then a syncobj without a fence and a syncobj with an untriggered 
> fence are identical as far as waiting is concerned:  You wait until it 
> has a signaled fence.

Crap, that makes the whole thing much more harder to implement.

>     The big advantage of the wait_event_interruptible() interface is
>     that your condition checking and process handling is bullet prove,
>     as far as I have seen so far your code is a bit buggy in that
>     direction.
>
>
> In v3, I used wait_event_interruptible.  Does it have those same bugs?

I haven't seen that yet, but when you now use wait_even_interruptible() 
the problem should be gone.

Regards,
Christian.

>
> --Jason
>
>     Christian.
>
>>
>>     --Jason
>>
>>
>>     _______________________________________________
>>     dri-devel mailing list
>>     dri-devel@lists.freedesktop.org
>>     <mailto:dri-devel@lists.freedesktop.org>
>>     https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>     <https://lists.freedesktop.org/mailman/listinfo/dri-devel>
>
>
>
Jason Ekstrand Aug. 10, 2017, 2:42 p.m. UTC | #13
On Thu, Aug 10, 2017 at 4:00 AM, Chris Wilson <chris@chris-wilson.co.uk>
wrote:

> Quoting Jason Ekstrand (2017-08-10 00:53:00)
> > On Wed, Aug 9, 2017 at 3:41 PM, Chris Wilson <chris@chris-wilson.co.uk>
> wrote:
> >     However, this installs the proxy into syncobj->fence with the result
> >     that any concurrent wait also become a WAIT_FOR_SUBMIT. The behaviour
> >     of drm_syncobj is then quite inconsistent, sometimes it will wait
> for a
> >     future fence, sometimes it will report an error.
> >
> >
> > Yeah, that's not good.  I thought about a variety of solutions to try and
> > re-use more core dma_fence code.  Ultimately I chose the current one
> because it
> > takes a snapshot of the syncobjs and then, from that point forward, it's
> > consistent with its snapshot.  Nothing I was able to come up with based
> on core
> > dma_fence wait routines does that.
>
> So if we choose to keep the proxy local to the fence-array and not replace
> syncobj->fence, we can still reduce this to a plain dma-fence-array +
> wait.
>
> Pertinent details:
>
> +static void syncobj_notify(struct drm_syncobj *syncobj, struct dma_fence
> *fence)
> +{
> +       struct drm_syncobj_cb *cb, *cn;
> +       unsigned long flags;
> +
> +       /* This is just an opencoded waitqueue; FIXME! */
> +       spin_lock_irqsave(&syncobj->lock, flags);
> +       list_for_each_entry_safe(cb, cn, &syncobj->cb_list, link)
> +               cb->func(cb, fence);
> +       INIT_LIST_HEAD(&syncobj->cb_list);
> +       spin_unlock_irqrestore(&syncobj->lock, flags);
> +}
> +
>  /**
>   * drm_syncobj_replace_fence - replace fence in a sync object.
>   * @syncobj: Sync object to replace fence in
> @@ -89,7 +109,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj
> *syncobj,
>
>         if (fence)
>                 dma_fence_get(fence);
> +
>         old_fence = xchg(&syncobj->fence, fence);
> +       if (!old_fence && fence)
> +               syncobj_notify(syncobj, fence);
>
>         dma_fence_put(old_fence);
>  }
> @@ -124,6 +147,9 @@ void drm_syncobj_free(struct kref *kref)
>         struct drm_syncobj *syncobj = container_of(kref,
>                                                    struct drm_syncobj,
>                                                    refcount);
> +
> +       syncobj_notify(syncobj, NULL);
> +
>         dma_fence_put(syncobj->fence);
>         kfree(syncobj);
>  }
> @@ -140,6 +166,8 @@ static int drm_syncobj_create(struct drm_file
> *file_private,
>                 return -ENOMEM;
>
>         kref_init(&syncobj->refcount);
> +       spin_lock_init(&syncobj->lock);
> +       INIT_LIST_HEAD(&syncobj->cb_list);
>
>         idr_preload(GFP_KERNEL);
>         spin_lock(&file_private->syncobj_table_lock);
>
> struct future_fence {
> +       struct drm_syncobj_cb base;
> +       struct dma_fence **slot;
> +};
> +
> +static void future_fence_cb(struct drm_syncobj_cb *cb, struct dma_fence
> *fence)
> +{
> +       struct future_fence *ff = container_of(cb, typeof(*ff), base);
> +
> +       if (fence)
> +               dma_fence_replace_proxy(ff->slot, fence);
>

Where does this come from?  I can't find it on dri-devel and "proxy"
doesn't show up anywhere in the dam-buf sources.  What does it do?


> +       else
> +               dma_fence_signal(*ff->slot);
> +
> +       kfree(ff);
> +}
> +
> +int
> +drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
> +                      struct drm_file *file_private)
> +{
> +       struct drm_syncobj_wait *args = data;
> +       struct dma_fence_array *array;
> +       struct dma_fence **fences;
> +       unsigned int count, i;
> +       long timeout;
> +       u32 *handles;
> +       int ret;
> +
> +       BUILD_BUG_ON(sizeof(*fences) < sizeof(*handles));
> +
> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +               return -ENODEV;
> +
> +       if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
> +                           DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> +               return -EINVAL;
> +
> +       count = args->count_handles;
> +       if (!count)
> +               return -EINVAL;
> +
> +       /* Get the handles from userspace */
> +       fences = kmalloc_array(count,
> +                              sizeof(struct dma_fence *),
> +                              __GFP_NOWARN | GFP_KERNEL);
> +       if (!fences)
> +               return -ENOMEM;
> +
> +       handles = (void *)fences + count * (sizeof(*fences) -
> sizeof(*handles));
> +       if (copy_from_user(handles,
> +                          u64_to_user_ptr(args->handles),
> +                          sizeof(u32) * count)) {
> +               ret = -EFAULT;
> +               goto err;
> +       }
> +
> +       for (i = 0; i < count; i++) {
> +               struct drm_syncobj *s;
> +
> +               ret = -ENOENT;
> +               s = drm_syncobj_find(file_private, handles[i]);
> +               if (s) {
> +                       ret = 0;
> +                       spin_lock_irq(&s->lock);
> +                       if (s->fence) {
> +                               fences[i] = dma_fence_get(s->fence);
> +                       } else if (args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
> +                               struct future_fence *cb;
> +
> +                               cb = kmalloc(sizeof(*cb), GFP_KERNEL);
> +                               fences[i] = dma_fence_create_proxy();
> +                               if (cb && fences[i]) {
> +                                       cb->slot = &fences[i];
> +                                       cb->base.func = future_fence_cb;
> +                                       list_add(&cb->base.link,
> &s->cb_list);
> +                               } else {
> +                                       kfree(cb);
> +                                       dma_fence_put(fences[i]);
> +                                       ret = -ENOMEM;
> +                               }
> +                       } else {
> +                               ret = -EINVAL;
> +                       }
> +                       spin_unlock_irq(&s->lock);
> +               }
> +               if (ret) {
> +                       count = i;
> +                       goto err_fences;
> +               }
> +       }
> +
> +       array = dma_fence_array_create(count, fences, 0, 0,
> +                                      !(args->flags &
> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL));
> +       if (!array) {
> +               ret = -ENOMEM;
> +               goto err_fences;
> +       }
> +
> +       timeout = drm_timeout_abs_to_jiffies(args->timeout_nsec);
> +       timeout = dma_fence_wait_timeout(&array->base, true, timeout);
> +       args->first_signaled = array->first_signaled;
> +       dma_fence_put(&array->base);
> +
> +       return timeout < 0 ? timeout : timeout == 0 ? -ETIME : 0; /* Gah.
> */
> +
> +err_fences:
> +       for (i = 0; i < count; i++)
> +               dma_fence_put(fences[i]);
> +err:
> +       kfree(fences);
> +       return ret;
> +}
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 510dfc2..5e7f654 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@ 
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
 #include <linux/sync_file.h>
+#include <linux/sched/signal.h>
 
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
@@ -104,6 +105,27 @@  static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
 	list_add_tail(&cb->node, &syncobj->cb_list);
 }
 
+static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
+						 struct dma_fence **fence,
+						 struct drm_syncobj_cb *cb,
+						 drm_syncobj_func_t func)
+{
+	int ret;
+
+	spin_lock(&syncobj->lock);
+	if (syncobj->fence) {
+		*fence = dma_fence_get(syncobj->fence);
+		ret = 1;
+	} else {
+		*fence = NULL;
+		drm_syncobj_add_callback_locked(syncobj, cb, func);
+		ret = 0;
+	}
+	spin_unlock(&syncobj->lock);
+
+	return ret;
+}
+
 /**
  * drm_syncobj_add_callback - adds a callback to syncobj::cb_list
  * @syncobj: Sync object to which to add the callback
@@ -526,6 +548,159 @@  drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 					&args->handle);
 }
 
+static int drm_syncobj_signaled(struct drm_syncobj *syncobj,
+				uint32_t wait_flags)
+{
+	struct dma_fence *fence;
+	int ret;
+
+	fence = drm_syncobj_fence_get(syncobj);
+	if (!fence) {
+		if (wait_flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)
+			return 0;
+		else
+			return -EINVAL;
+	}
+
+	ret = dma_fence_is_signaled(fence) ? 1 : 0;
+
+	dma_fence_put(fence);
+
+	return ret;
+}
+
+struct syncobj_wait_entry {
+	struct task_struct *task;
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	struct drm_syncobj_cb syncobj_cb;
+};
+
+static void syncobj_wait_fence_func(struct dma_fence *fence,
+				    struct dma_fence_cb *cb)
+{
+	struct syncobj_wait_entry *wait =
+		container_of(cb, struct syncobj_wait_entry, fence_cb);
+
+	wake_up_process(wait->task);
+}
+
+static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
+				      struct drm_syncobj_cb *cb)
+{
+	struct syncobj_wait_entry *wait =
+		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
+
+	/* This happens inside the syncobj lock */
+	wait->fence = dma_fence_get(syncobj->fence);
+	wake_up_process(wait->task);
+}
+
+static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
+						  uint32_t count,
+						  uint32_t flags,
+						  signed long timeout,
+						  uint32_t *idx)
+{
+	struct syncobj_wait_entry *entries;
+	struct dma_fence *fence;
+	signed long ret;
+	uint32_t signaled_count, i;
+
+	if (timeout == 0) {
+		signaled_count = 0;
+		for (i = 0; i < count; ++i) {
+			ret = drm_syncobj_signaled(syncobjs[i], flags);
+			if (ret < 0)
+				return ret;
+			if (ret == 0)
+				continue;
+			if (signaled_count == 0 && idx)
+				*idx = i;
+			signaled_count++;
+		}
+
+		if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+			return signaled_count == count ? 1 : 0;
+		else
+			return signaled_count > 0 ? 1 : 0;
+	}
+
+	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < count; ++i) {
+		entries[i].task = current;
+
+		if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
+							      &entries[i].fence,
+							      &entries[i].syncobj_cb,
+							      syncobj_wait_syncobj_func);
+		} else {
+			entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
+			if (!entries[i].fence) {
+				ret = -EINVAL;
+				goto err_cleanup_entries;
+			}
+		}
+	}
+
+	ret = timeout;
+	while (ret > 0) {
+		signaled_count = 0;
+		for (i = 0; i < count; ++i) {
+			fence = entries[i].fence;
+			if (!fence)
+				continue;
+
+			if (dma_fence_is_signaled(fence) ||
+			    (!entries[i].fence_cb.func &&
+			     dma_fence_add_callback(fence,
+						    &entries[i].fence_cb,
+						    syncobj_wait_fence_func))) {
+				/* The fence has been signaled */
+				if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
+					signaled_count++;
+				} else {
+					if (idx)
+						*idx = i;
+					goto done_waiting;
+				}
+			}
+		}
+
+		if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) &&
+		    signaled_count == count)
+			goto done_waiting;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+done_waiting:
+	__set_current_state(TASK_RUNNING);
+
+err_cleanup_entries:
+	for (i = 0; i < count; ++i) {
+		if (entries[i].syncobj_cb.func)
+			drm_syncobj_remove_callback(syncobjs[i],
+						    &entries[i].syncobj_cb);
+		if (entries[i].fence_cb.func)
+			dma_fence_remove_callback(entries[i].fence,
+						  &entries[i].fence_cb);
+		dma_fence_put(entries[i].fence);
+	}
+	kfree(entries);
+
+	return ret;
+}
+
 /**
  * drm_timeout_abs_to_jiffies - calculate jiffies timeout from absolute value
  *
@@ -558,43 +733,19 @@  static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
 	return timeout_jiffies64 + 1;
 }
 
-static int drm_syncobj_wait_fences(struct drm_device *dev,
-				   struct drm_file *file_private,
-				   struct drm_syncobj_wait *wait,
-				   struct dma_fence **fences)
+static int drm_syncobj_array_wait(struct drm_device *dev,
+				  struct drm_file *file_private,
+				  struct drm_syncobj_wait *wait,
+				  struct drm_syncobj **syncobjs)
 {
 	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
 	signed long ret = 0;
 	uint32_t first = ~0;
 
-	if (wait->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL) {
-		uint32_t i;
-		for (i = 0; i < wait->count_handles; i++) {
-			ret = dma_fence_wait_timeout(fences[i], true, timeout);
-
-			/* Various dma_fence wait callbacks will return
-			 * ENOENT to indicate that the fence has already
-			 * been signaled.  We need to sanitize this to 0 so
-			 * we don't return early and the client doesn't see
-			 * an unexpected error.
-			 */
-			if (ret == -ENOENT)
-				ret = 0;
-
-			if (ret < 0)
-				return ret;
-			if (ret == 0)
-				break;
-			timeout = ret;
-		}
-		first = 0;
-	} else {
-		ret = dma_fence_wait_any_timeout(fences,
-						 wait->count_handles,
-						 true, timeout,
-						 &first);
-	}
-
+	ret = drm_syncobj_array_wait_timeout(syncobjs,
+					     wait->count_handles,
+					     wait->flags,
+					     timeout, &first);
 	if (ret < 0)
 		return ret;
 
@@ -610,14 +761,15 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_syncobj_wait *args = data;
 	uint32_t *handles;
-	struct dma_fence **fences;
+	struct drm_syncobj **syncobjs;
 	int ret = 0;
 	uint32_t i;
 
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -ENODEV;
 
-	if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
 		return -EINVAL;
 
 	if (args->count_handles == 0)
@@ -636,27 +788,28 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 		goto err_free_handles;
 	}
 
-	fences = kcalloc(args->count_handles,
-			 sizeof(struct dma_fence *), GFP_KERNEL);
-	if (!fences) {
+	syncobjs = kcalloc(args->count_handles,
+			   sizeof(struct drm_syncobj *), GFP_KERNEL);
+	if (!syncobjs) {
 		ret = -ENOMEM;
 		goto err_free_handles;
 	}
 
 	for (i = 0; i < args->count_handles; i++) {
-		ret = drm_syncobj_find_fence(file_private, handles[i],
-					     &fences[i]);
-		if (ret)
+		syncobjs[i] = drm_syncobj_find(file_private, handles[i]);
+		if (!syncobjs[i]) {
+			ret = -ENOENT;
 			goto err_free_fence_array;
+		}
 	}
 
-	ret = drm_syncobj_wait_fences(dev, file_private,
-				      args, fences);
+	ret = drm_syncobj_array_wait(dev, file_private,
+				     args, syncobjs);
 
 err_free_fence_array:
-	for (i = 0; i < args->count_handles; i++)
-		dma_fence_put(fences[i]);
-	kfree(fences);
+	while (i-- > 0)
+		drm_syncobj_put(syncobjs[i]);
+	kfree(syncobjs);
 err_free_handles:
 	kfree(handles);
 
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 4b301b4..f8ec8fe 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -719,6 +719,7 @@  struct drm_syncobj_handle {
 };
 
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */