diff mbox series

[06/10] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3

Message ID 20181211103449.25899-6-david1.zhou@amd.com (mailing list archive)
State New, archived
Headers show
Series [01/10] dma-buf: add new dma_fence_chain container v4 | expand

Commit Message

Chunming Zhou Dec. 11, 2018, 10:34 a.m. UTC
From: Christian König <ckoenig.leichtzumerken@gmail.com>

Implement finding the right timeline point in drm_syncobj_find_fence.

v2: return -EINVAL when the point is not submitted yet.
v3: fix reference counting bug, add flags handling as well

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

Comments

Chris Wilson Dec. 13, 2018, 11:37 a.m. UTC | #1
Quoting Chunming Zhou (2018-12-11 10:34:45)
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> 
> Implement finding the right timeline point in drm_syncobj_find_fence.
> 
> v2: return -EINVAL when the point is not submitted yet.
> v3: fix reference counting bug, add flags handling as well
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
>  1 file changed, 40 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 76ce13dafc4d..d964b348ecba 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>                            struct dma_fence **fence)
>  {
>         struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> -       int ret = 0;
> +       struct syncobj_wait_entry wait;
> +       int ret;
>  
>         if (!syncobj)
>                 return -ENOENT;
>  
>         *fence = drm_syncobj_fence_get(syncobj);
> -       if (!*fence) {
> +       drm_syncobj_put(syncobj);
> +
> +       if (*fence) {
> +               ret = dma_fence_chain_find_seqno(fence, point);
> +               if (!ret)
> +                       return 0;
> +               dma_fence_put(*fence);
> +       } else {
>                 ret = -EINVAL;
>         }
> -       drm_syncobj_put(syncobj);
> +
> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> +               return ret;
> +
> +       memset(&wait, 0, sizeof(wait));
> +       wait.task = current;
> +       wait.point = point;
> +       drm_syncobj_fence_add_wait(syncobj, &wait);

> +
> +       do {
> +               set_current_state(TASK_INTERRUPTIBLE);
> +               if (wait.fence) {
> +                       ret = 0;
> +                       break;
> +               }
> +
> +               if (signal_pending(current)) {
> +                       ret = -ERESTARTSYS;
> +                       break;
> +               }
> +
> +               schedule();
> +       } while (1);

I've previously used a dma_fence_proxy so that we could do nonblocking
waits on future submits. That would be preferrable (a requirement for
our stupid BKL-driven code).
-Chris
Christian König Dec. 13, 2018, 12:11 p.m. UTC | #2
Am 13.12.18 um 12:37 schrieb Chris Wilson:
> Quoting Chunming Zhou (2018-12-11 10:34:45)
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>
>> Implement finding the right timeline point in drm_syncobj_find_fence.
>>
>> v2: return -EINVAL when the point is not submitted yet.
>> v3: fix reference counting bug, add flags handling as well
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
>>   1 file changed, 40 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 76ce13dafc4d..d964b348ecba 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>                             struct dma_fence **fence)
>>   {
>>          struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>> -       int ret = 0;
>> +       struct syncobj_wait_entry wait;
>> +       int ret;
>>   
>>          if (!syncobj)
>>                  return -ENOENT;
>>   
>>          *fence = drm_syncobj_fence_get(syncobj);
>> -       if (!*fence) {
>> +       drm_syncobj_put(syncobj);
>> +
>> +       if (*fence) {
>> +               ret = dma_fence_chain_find_seqno(fence, point);
>> +               if (!ret)
>> +                       return 0;
>> +               dma_fence_put(*fence);
>> +       } else {
>>                  ret = -EINVAL;
>>          }
>> -       drm_syncobj_put(syncobj);
>> +
>> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>> +               return ret;
>> +
>> +       memset(&wait, 0, sizeof(wait));
>> +       wait.task = current;
>> +       wait.point = point;
>> +       drm_syncobj_fence_add_wait(syncobj, &wait);
>> +
>> +       do {
>> +               set_current_state(TASK_INTERRUPTIBLE);
>> +               if (wait.fence) {
>> +                       ret = 0;
>> +                       break;
>> +               }
>> +
>> +               if (signal_pending(current)) {
>> +                       ret = -ERESTARTSYS;
>> +                       break;
>> +               }
>> +
>> +               schedule();
>> +       } while (1);
> I've previously used a dma_fence_proxy so that we could do nonblocking
> waits on future submits. That would be preferrable (a requirement for
> our stupid BKL-driven code).

That is exactly what I would definitely NAK.

I would rather say we should come up with a wait_multiple_events() macro 
and completely nuke the custom implementation of this in:
1. dma_fence_default_wait and dma_fence_wait_any_timeout
2. the radeon fence implementation
3. the nouveau fence implementation
4. the syncobj code

Cause all of them do exactly the same. The dma_fence implementation 
unfortunately came up with a custom event handling mechanism instead of 
extending the core Linux wait_event() system.

This in turn lead to a lot of this duplicated handling.

Christian.

> -Chris
Chris Wilson Dec. 13, 2018, 12:21 p.m. UTC | #3
Quoting Koenig, Christian (2018-12-13 12:11:10)
> Am 13.12.18 um 12:37 schrieb Chris Wilson:
> > Quoting Chunming Zhou (2018-12-11 10:34:45)
> >> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>
> >> Implement finding the right timeline point in drm_syncobj_find_fence.
> >>
> >> v2: return -EINVAL when the point is not submitted yet.
> >> v3: fix reference counting bug, add flags handling as well
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
> >>   1 file changed, 40 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >> index 76ce13dafc4d..d964b348ecba 100644
> >> --- a/drivers/gpu/drm/drm_syncobj.c
> >> +++ b/drivers/gpu/drm/drm_syncobj.c
> >> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
> >>                             struct dma_fence **fence)
> >>   {
> >>          struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> >> -       int ret = 0;
> >> +       struct syncobj_wait_entry wait;
> >> +       int ret;
> >>   
> >>          if (!syncobj)
> >>                  return -ENOENT;
> >>   
> >>          *fence = drm_syncobj_fence_get(syncobj);
> >> -       if (!*fence) {
> >> +       drm_syncobj_put(syncobj);
> >> +
> >> +       if (*fence) {
> >> +               ret = dma_fence_chain_find_seqno(fence, point);
> >> +               if (!ret)
> >> +                       return 0;
> >> +               dma_fence_put(*fence);
> >> +       } else {
> >>                  ret = -EINVAL;
> >>          }
> >> -       drm_syncobj_put(syncobj);
> >> +
> >> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> >> +               return ret;
> >> +
> >> +       memset(&wait, 0, sizeof(wait));
> >> +       wait.task = current;
> >> +       wait.point = point;
> >> +       drm_syncobj_fence_add_wait(syncobj, &wait);
> >> +
> >> +       do {
> >> +               set_current_state(TASK_INTERRUPTIBLE);
> >> +               if (wait.fence) {
> >> +                       ret = 0;
> >> +                       break;
> >> +               }
> >> +
> >> +               if (signal_pending(current)) {
> >> +                       ret = -ERESTARTSYS;
> >> +                       break;
> >> +               }
> >> +
> >> +               schedule();
> >> +       } while (1);
> > I've previously used a dma_fence_proxy so that we could do nonblocking
> > waits on future submits. That would be preferrable (a requirement for
> > our stupid BKL-driven code).
> 
> That is exactly what I would definitely NAK.
> 
> I would rather say we should come up with a wait_multiple_events() macro 
> and completely nuke the custom implementation of this in:
> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
> 2. the radeon fence implementation
> 3. the nouveau fence implementation
> 4. the syncobj code
> 
> Cause all of them do exactly the same. The dma_fence implementation 
> unfortunately came up with a custom event handling mechanism instead of 
> extending the core Linux wait_event() system.

I don't want a blocking wait at all.
-Chris
Christian König Dec. 13, 2018, 12:24 p.m. UTC | #4
Am 13.12.18 um 13:21 schrieb Chris Wilson:
> Quoting Koenig, Christian (2018-12-13 12:11:10)
>> Am 13.12.18 um 12:37 schrieb Chris Wilson:
>>> Quoting Chunming Zhou (2018-12-11 10:34:45)
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>
>>>> Implement finding the right timeline point in drm_syncobj_find_fence.
>>>>
>>>> v2: return -EINVAL when the point is not submitted yet.
>>>> v3: fix reference counting bug, add flags handling as well
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
>>>>    1 file changed, 40 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>> index 76ce13dafc4d..d964b348ecba 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>                              struct dma_fence **fence)
>>>>    {
>>>>           struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>>>> -       int ret = 0;
>>>> +       struct syncobj_wait_entry wait;
>>>> +       int ret;
>>>>    
>>>>           if (!syncobj)
>>>>                   return -ENOENT;
>>>>    
>>>>           *fence = drm_syncobj_fence_get(syncobj);
>>>> -       if (!*fence) {
>>>> +       drm_syncobj_put(syncobj);
>>>> +
>>>> +       if (*fence) {
>>>> +               ret = dma_fence_chain_find_seqno(fence, point);
>>>> +               if (!ret)
>>>> +                       return 0;
>>>> +               dma_fence_put(*fence);
>>>> +       } else {
>>>>                   ret = -EINVAL;
>>>>           }
>>>> -       drm_syncobj_put(syncobj);
>>>> +
>>>> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>>>> +               return ret;
>>>> +
>>>> +       memset(&wait, 0, sizeof(wait));
>>>> +       wait.task = current;
>>>> +       wait.point = point;
>>>> +       drm_syncobj_fence_add_wait(syncobj, &wait);
>>>> +
>>>> +       do {
>>>> +               set_current_state(TASK_INTERRUPTIBLE);
>>>> +               if (wait.fence) {
>>>> +                       ret = 0;
>>>> +                       break;
>>>> +               }
>>>> +
>>>> +               if (signal_pending(current)) {
>>>> +                       ret = -ERESTARTSYS;
>>>> +                       break;
>>>> +               }
>>>> +
>>>> +               schedule();
>>>> +       } while (1);
>>> I've previously used a dma_fence_proxy so that we could do nonblocking
>>> waits on future submits. That would be preferrable (a requirement for
>>> our stupid BKL-driven code).
>> That is exactly what I would definitely NAK.
>>
>> I would rather say we should come up with a wait_multiple_events() macro
>> and completely nuke the custom implementation of this in:
>> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
>> 2. the radeon fence implementation
>> 3. the nouveau fence implementation
>> 4. the syncobj code
>>
>> Cause all of them do exactly the same. The dma_fence implementation
>> unfortunately came up with a custom event handling mechanism instead of
>> extending the core Linux wait_event() system.
> I don't want a blocking wait at all.

Ok I wasn't clear enough :) That is exactly what I would NAK!

The wait must be blocking or otherwise you would allow wait-before-signal.

Christian.

> -Chris
Daniel Vetter Dec. 13, 2018, 4:01 p.m. UTC | #5
On Thu, Dec 13, 2018 at 12:24:57PM +0000, Koenig, Christian wrote:
> Am 13.12.18 um 13:21 schrieb Chris Wilson:
> > Quoting Koenig, Christian (2018-12-13 12:11:10)
> >> Am 13.12.18 um 12:37 schrieb Chris Wilson:
> >>> Quoting Chunming Zhou (2018-12-11 10:34:45)
> >>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>>
> >>>> Implement finding the right timeline point in drm_syncobj_find_fence.
> >>>>
> >>>> v2: return -EINVAL when the point is not submitted yet.
> >>>> v3: fix reference counting bug, add flags handling as well
> >>>>
> >>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>> ---
> >>>>    drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
> >>>>    1 file changed, 40 insertions(+), 3 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >>>> index 76ce13dafc4d..d964b348ecba 100644
> >>>> --- a/drivers/gpu/drm/drm_syncobj.c
> >>>> +++ b/drivers/gpu/drm/drm_syncobj.c
> >>>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
> >>>>                              struct dma_fence **fence)
> >>>>    {
> >>>>           struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> >>>> -       int ret = 0;
> >>>> +       struct syncobj_wait_entry wait;
> >>>> +       int ret;
> >>>>    
> >>>>           if (!syncobj)
> >>>>                   return -ENOENT;
> >>>>    
> >>>>           *fence = drm_syncobj_fence_get(syncobj);
> >>>> -       if (!*fence) {
> >>>> +       drm_syncobj_put(syncobj);
> >>>> +
> >>>> +       if (*fence) {
> >>>> +               ret = dma_fence_chain_find_seqno(fence, point);
> >>>> +               if (!ret)
> >>>> +                       return 0;
> >>>> +               dma_fence_put(*fence);
> >>>> +       } else {
> >>>>                   ret = -EINVAL;
> >>>>           }
> >>>> -       drm_syncobj_put(syncobj);
> >>>> +
> >>>> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> >>>> +               return ret;
> >>>> +
> >>>> +       memset(&wait, 0, sizeof(wait));
> >>>> +       wait.task = current;
> >>>> +       wait.point = point;
> >>>> +       drm_syncobj_fence_add_wait(syncobj, &wait);
> >>>> +
> >>>> +       do {
> >>>> +               set_current_state(TASK_INTERRUPTIBLE);
> >>>> +               if (wait.fence) {
> >>>> +                       ret = 0;
> >>>> +                       break;
> >>>> +               }
> >>>> +
> >>>> +               if (signal_pending(current)) {
> >>>> +                       ret = -ERESTARTSYS;
> >>>> +                       break;
> >>>> +               }
> >>>> +
> >>>> +               schedule();
> >>>> +       } while (1);
> >>> I've previously used a dma_fence_proxy so that we could do nonblocking
> >>> waits on future submits. That would be preferrable (a requirement for
> >>> our stupid BKL-driven code).
> >> That is exactly what I would definitely NAK.
> >>
> >> I would rather say we should come up with a wait_multiple_events() macro
> >> and completely nuke the custom implementation of this in:
> >> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
> >> 2. the radeon fence implementation
> >> 3. the nouveau fence implementation
> >> 4. the syncobj code
> >>
> >> Cause all of them do exactly the same. The dma_fence implementation
> >> unfortunately came up with a custom event handling mechanism instead of
> >> extending the core Linux wait_event() system.
> > I don't want a blocking wait at all.
> 
> Ok I wasn't clear enough :) That is exactly what I would NAK!
> 
> The wait must be blocking or otherwise you would allow wait-before-signal.

Well the current implementation is pulling a rather big trick on readers
in this regard: It looks like a dma_fence, it's implemented as one even,
heck you even open-code a dma_fence_wait here.

Except the semantics are completely different.

So aside from the discussion whether we really want to fully chain them I
think it just doesn't make sense to implement the "wait for fence submit"
as a dma_fence wait. And I'd outright remove that part from the uapi, and
force the wait. The current radv/anv plans I discussed with Jason was that
we'd have a separate submit thread, and hence unconditionally blocking
until the fence has materialized is the right thing to do. Even allowing
that option, either through a flag, or making these things look like
dma_fences (they are _not_) just tricks folks into misunderstanding what's
going on. Code sharing just because the code looks similar is imo a really
bad idea, when the semantics are entirely different (that was also the
reason behind not reusing all the cpu event stuff for dma_fence, they're
not normal cpu events).
-Daniel

> 
> Christian.
> 
> > -Chris
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Christian König Dec. 13, 2018, 4:47 p.m. UTC | #6
Am 13.12.18 um 17:01 schrieb Daniel Vetter:
> On Thu, Dec 13, 2018 at 12:24:57PM +0000, Koenig, Christian wrote:
>> Am 13.12.18 um 13:21 schrieb Chris Wilson:
>>> Quoting Koenig, Christian (2018-12-13 12:11:10)
>>>> Am 13.12.18 um 12:37 schrieb Chris Wilson:
>>>>> Quoting Chunming Zhou (2018-12-11 10:34:45)
>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>>
>>>>>> Implement finding the right timeline point in drm_syncobj_find_fence.
>>>>>>
>>>>>> v2: return -EINVAL when the point is not submitted yet.
>>>>>> v3: fix reference counting bug, add flags handling as well
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
>>>>>>     1 file changed, 40 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>>>> index 76ce13dafc4d..d964b348ecba 100644
>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
>>>>>>                               struct dma_fence **fence)
>>>>>>     {
>>>>>>            struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
>>>>>> -       int ret = 0;
>>>>>> +       struct syncobj_wait_entry wait;
>>>>>> +       int ret;
>>>>>>     
>>>>>>            if (!syncobj)
>>>>>>                    return -ENOENT;
>>>>>>     
>>>>>>            *fence = drm_syncobj_fence_get(syncobj);
>>>>>> -       if (!*fence) {
>>>>>> +       drm_syncobj_put(syncobj);
>>>>>> +
>>>>>> +       if (*fence) {
>>>>>> +               ret = dma_fence_chain_find_seqno(fence, point);
>>>>>> +               if (!ret)
>>>>>> +                       return 0;
>>>>>> +               dma_fence_put(*fence);
>>>>>> +       } else {
>>>>>>                    ret = -EINVAL;
>>>>>>            }
>>>>>> -       drm_syncobj_put(syncobj);
>>>>>> +
>>>>>> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
>>>>>> +               return ret;
>>>>>> +
>>>>>> +       memset(&wait, 0, sizeof(wait));
>>>>>> +       wait.task = current;
>>>>>> +       wait.point = point;
>>>>>> +       drm_syncobj_fence_add_wait(syncobj, &wait);
>>>>>> +
>>>>>> +       do {
>>>>>> +               set_current_state(TASK_INTERRUPTIBLE);
>>>>>> +               if (wait.fence) {
>>>>>> +                       ret = 0;
>>>>>> +                       break;
>>>>>> +               }
>>>>>> +
>>>>>> +               if (signal_pending(current)) {
>>>>>> +                       ret = -ERESTARTSYS;
>>>>>> +                       break;
>>>>>> +               }
>>>>>> +
>>>>>> +               schedule();
>>>>>> +       } while (1);
>>>>> I've previously used a dma_fence_proxy so that we could do nonblocking
>>>>> waits on future submits. That would be preferrable (a requirement for
>>>>> our stupid BKL-driven code).
>>>> That is exactly what I would definitely NAK.
>>>>
>>>> I would rather say we should come up with a wait_multiple_events() macro
>>>> and completely nuke the custom implementation of this in:
>>>> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
>>>> 2. the radeon fence implementation
>>>> 3. the nouveau fence implementation
>>>> 4. the syncobj code
>>>>
>>>> Cause all of them do exactly the same. The dma_fence implementation
>>>> unfortunately came up with a custom event handling mechanism instead of
>>>> extending the core Linux wait_event() system.
>>> I don't want a blocking wait at all.
>> Ok I wasn't clear enough :) That is exactly what I would NAK!
>>
>> The wait must be blocking or otherwise you would allow wait-before-signal.
> Well the current implementation is pulling a rather big trick on readers
> in this regard: It looks like a dma_fence, it's implemented as one even,
> heck you even open-code a dma_fence_wait here.
>
> Except the semantics are completely different.
>
> So aside from the discussion whether we really want to fully chain them I
> think it just doesn't make sense to implement the "wait for fence submit"
> as a dma_fence wait. And I'd outright remove that part from the uapi, and
> force the wait. The current radv/anv plans I discussed with Jason was that
> we'd have a separate submit thread, and hence unconditionally blocking
> until the fence has materialized is the right thing to do. Even allowing
> that option, either through a flag, or making these things look like
> dma_fences (they are _not_) just tricks folks into misunderstanding what's
> going on.

Good, that sounds strongly like something I can agree on as well.

> Code sharing just because the code looks similar is imo a really
> bad idea, when the semantics are entirely different (that was also the
> reason behind not reusing all the cpu event stuff for dma_fence, they're
> not normal cpu events).

Ok, the last sentence is what I don't understand.

What exactly is the semantic difference between the dma_fence_wait and 
the wait_event interface?

I mean the wait_event interface was introduced to prevent drivers from 
openly coding an event interface and getting it wrong all the time.

So a good part of the bugs we have seen around waiting for dma-fences 
are exactly why wait_event was invented in the first place.

The only big thing I can see missing in the wait_event interface is 
waiting for many events at the same time, but that should be a rather 
easy addition.

Regards,
Christian.

> -Daniel
>
>> Christian.
>>
>>> -Chris
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Dec. 13, 2018, 5:26 p.m. UTC | #7
On Thu, Dec 13, 2018 at 5:47 PM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
>
> Am 13.12.18 um 17:01 schrieb Daniel Vetter:
> > On Thu, Dec 13, 2018 at 12:24:57PM +0000, Koenig, Christian wrote:
> >> Am 13.12.18 um 13:21 schrieb Chris Wilson:
> >>> Quoting Koenig, Christian (2018-12-13 12:11:10)
> >>>> Am 13.12.18 um 12:37 schrieb Chris Wilson:
> >>>>> Quoting Chunming Zhou (2018-12-11 10:34:45)
> >>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>>>>
> >>>>>> Implement finding the right timeline point in drm_syncobj_find_fence.
> >>>>>>
> >>>>>> v2: return -EINVAL when the point is not submitted yet.
> >>>>>> v3: fix reference counting bug, add flags handling as well
> >>>>>>
> >>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>>> ---
> >>>>>>     drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
> >>>>>>     1 file changed, 40 insertions(+), 3 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >>>>>> index 76ce13dafc4d..d964b348ecba 100644
> >>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
> >>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
> >>>>>> @@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
> >>>>>>                               struct dma_fence **fence)
> >>>>>>     {
> >>>>>>            struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
> >>>>>> -       int ret = 0;
> >>>>>> +       struct syncobj_wait_entry wait;
> >>>>>> +       int ret;
> >>>>>>
> >>>>>>            if (!syncobj)
> >>>>>>                    return -ENOENT;
> >>>>>>
> >>>>>>            *fence = drm_syncobj_fence_get(syncobj);
> >>>>>> -       if (!*fence) {
> >>>>>> +       drm_syncobj_put(syncobj);
> >>>>>> +
> >>>>>> +       if (*fence) {
> >>>>>> +               ret = dma_fence_chain_find_seqno(fence, point);
> >>>>>> +               if (!ret)
> >>>>>> +                       return 0;
> >>>>>> +               dma_fence_put(*fence);
> >>>>>> +       } else {
> >>>>>>                    ret = -EINVAL;
> >>>>>>            }
> >>>>>> -       drm_syncobj_put(syncobj);
> >>>>>> +
> >>>>>> +       if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
> >>>>>> +               return ret;
> >>>>>> +
> >>>>>> +       memset(&wait, 0, sizeof(wait));
> >>>>>> +       wait.task = current;
> >>>>>> +       wait.point = point;
> >>>>>> +       drm_syncobj_fence_add_wait(syncobj, &wait);
> >>>>>> +
> >>>>>> +       do {
> >>>>>> +               set_current_state(TASK_INTERRUPTIBLE);
> >>>>>> +               if (wait.fence) {
> >>>>>> +                       ret = 0;
> >>>>>> +                       break;
> >>>>>> +               }
> >>>>>> +
> >>>>>> +               if (signal_pending(current)) {
> >>>>>> +                       ret = -ERESTARTSYS;
> >>>>>> +                       break;
> >>>>>> +               }
> >>>>>> +
> >>>>>> +               schedule();
> >>>>>> +       } while (1);
> >>>>> I've previously used a dma_fence_proxy so that we could do nonblocking
> >>>>> waits on future submits. That would be preferrable (a requirement for
> >>>>> our stupid BKL-driven code).
> >>>> That is exactly what I would definitely NAK.
> >>>>
> >>>> I would rather say we should come up with a wait_multiple_events() macro
> >>>> and completely nuke the custom implementation of this in:
> >>>> 1. dma_fence_default_wait and dma_fence_wait_any_timeout
> >>>> 2. the radeon fence implementation
> >>>> 3. the nouveau fence implementation
> >>>> 4. the syncobj code
> >>>>
> >>>> Cause all of them do exactly the same. The dma_fence implementation
> >>>> unfortunately came up with a custom event handling mechanism instead of
> >>>> extending the core Linux wait_event() system.
> >>> I don't want a blocking wait at all.
> >> Ok I wasn't clear enough :) That is exactly what I would NAK!
> >>
> >> The wait must be blocking or otherwise you would allow wait-before-signal.
> > Well the current implementation is pulling a rather big trick on readers
> > in this regard: It looks like a dma_fence, it's implemented as one even,
> > heck you even open-code a dma_fence_wait here.
> >
> > Except the semantics are completely different.
> >
> > So aside from the discussion whether we really want to fully chain them I
> > think it just doesn't make sense to implement the "wait for fence submit"
> > as a dma_fence wait. And I'd outright remove that part from the uapi, and
> > force the wait. The current radv/anv plans I discussed with Jason was that
> > we'd have a separate submit thread, and hence unconditionally blocking
> > until the fence has materialized is the right thing to do. Even allowing
> > that option, either through a flag, or making these things look like
> > dma_fences (they are _not_) just tricks folks into misunderstanding what's
> > going on.
>
> Good, that sounds strongly like something I can agree on as well.
>
> > Code sharing just because the code looks similar is imo a really
> > bad idea, when the semantics are entirely different (that was also the
> > reason behind not reusing all the cpu event stuff for dma_fence, they're
> > not normal cpu events).
>
> Ok, the last sentence is what I don't understand.
>
> What exactly is the semantic difference between the dma_fence_wait and
> the wait_event interface?
>
> I mean the wait_event interface was introduced to prevent drivers from
> openly coding an event interface and getting it wrong all the time.
>
> So a good part of the bugs we have seen around waiting for dma-fences
> are exactly why wait_event was invented in the first place.
>
> The only big thing I can see missing in the wait_event interface is
> waiting for many events at the same time, but that should be a rather
> easy addition.

So this bikeshed was years ago, maybe I should type a patch to
document it, but as far as I remember the big difference is:

- wait_event and friends generally Just Work. It can go wrong of
course, but the usual pattern is that the waker-side does and
uncoditional wake_up_all, and hence all the waiter needs to do is add
themselves to the waiter list.

- dma_buf otoh is entirely different: We wanted to support all kinds
fo signalling modes, including having interrupts disabled by default
(not sure whether we actually achieve this still with all the cpu-side
scheduling the big drivers do). Which means the waker does not
unconditionally call wake_up_all, at least not timeline, and waiters
need to call dma_fence_enable_signalling before they can add
themselves to the waiter list and call schedule().

The other bit difference is how you check for the classic wakeup races
where the event happens between when you checked for it and when you
go to sleep. Because hw is involved, the rules are again a bit
different, and their different between drivers because hw is
incoherent/broken in all kinds of ways. So there's also really tricky
things going on between adding the waiter to the waiter list and
dma_fence_enable_signalling. For pure cpu events you can ignore this
and bake the few necessary barriers into the various macros, dma_fence
needs more.

Adding Maarten, maybe there was more. I definitely remember huge&very
long discussions about all this.
-Daniel
Christian König Dec. 13, 2018, 6:55 p.m. UTC | #8
Am 13.12.18 um 18:26 schrieb Daniel Vetter:
>>> Code sharing just because the code looks similar is imo a really
>>> bad idea, when the semantics are entirely different (that was also the
>>> reason behind not reusing all the cpu event stuff for dma_fence, they're
>>> not normal cpu events).
>> Ok, the last sentence is what I don't understand.
>>
>> What exactly is the semantic difference between the dma_fence_wait and
>> the wait_event interface?
>>
>> I mean the wait_event interface was introduced to prevent drivers from
>> openly coding an event interface and getting it wrong all the time.
>>
>> So a good part of the bugs we have seen around waiting for dma-fences
>> are exactly why wait_event was invented in the first place.
>>
>> The only big thing I can see missing in the wait_event interface is
>> waiting for many events at the same time, but that should be a rather
>> easy addition.
> So this bikeshed was years ago, maybe I should type a patch to
> document it, but as far as I remember the big difference is:
>
> - wait_event and friends generally Just Work. It can go wrong of
> course, but the usual pattern is that the waker-side does and
> uncoditional wake_up_all, and hence all the waiter needs to do is add
> themselves to the waiter list.
>
> - dma_buf otoh is entirely different: We wanted to support all kinds
> fo signalling modes, including having interrupts disabled by default
> (not sure whether we actually achieve this still with all the cpu-side
> scheduling the big drivers do). Which means the waker does not
> unconditionally call wake_up_all, at least not timeline, and waiters
> need to call dma_fence_enable_signalling before they can add
> themselves to the waiter list and call schedule().

Well that is not something I'm questioning because we really need this 
behavior as well.

But all of this can be perfectly implemented on top of wake_up_all.

> The other bit difference is how you check for the classic wakeup races
> where the event happens between when you checked for it and when you
> go to sleep. Because hw is involved, the rules are again a bit
> different, and their different between drivers because hw is
> incoherent/broken in all kinds of ways. So there's also really tricky
> things going on between adding the waiter to the waiter list and
> dma_fence_enable_signalling. For pure cpu events you can ignore this
> and bake the few necessary barriers into the various macros, dma_fence
> needs more.

Ah, yes I think I know what you mean with that and I also consider this 
a bad idea as well.

Only very few drivers actually need this behavior and the ones who do 
should be perfectly able to implement this inside the driver code.

The crux is that leaking this behavior into the dma-fence made it 
unnecessary complicated and result in quite a bunch of unnecessary 
irq_work and delayed_work usage.

I will take a look at this over the holidays. Shouldn't be to hard to 
fix and actually has some value additional to being just a nice cleanup.

Regards,
Christian.

>
> Adding Maarten, maybe there was more. I definitely remember huge&very
> long discussions about all this.
> -Daniel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 76ce13dafc4d..d964b348ecba 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -231,16 +231,53 @@  int drm_syncobj_find_fence(struct drm_file *file_private,
 			   struct dma_fence **fence)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-	int ret = 0;
+	struct syncobj_wait_entry wait;
+	int ret;
 
 	if (!syncobj)
 		return -ENOENT;
 
 	*fence = drm_syncobj_fence_get(syncobj);
-	if (!*fence) {
+	drm_syncobj_put(syncobj);
+
+	if (*fence) {
+		ret = dma_fence_chain_find_seqno(fence, point);
+		if (!ret)
+			return 0;
+		dma_fence_put(*fence);
+	} else {
 		ret = -EINVAL;
 	}
-	drm_syncobj_put(syncobj);
+
+	if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+		return ret;
+
+	memset(&wait, 0, sizeof(wait));
+	wait.task = current;
+	wait.point = point;
+	drm_syncobj_fence_add_wait(syncobj, &wait);
+
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (wait.fence) {
+			ret = 0;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		schedule();
+	} while (1);
+
+	__set_current_state(TASK_RUNNING);
+	*fence = wait.fence;
+
+	if (wait.node.next)
+		drm_syncobj_remove_wait(syncobj, &wait);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);