[4/7] anv: Support wait for heterogeneous list of fences [v2]
diff mbox

Message ID 20180615025256.10657-5-keithp@keithp.com
State New
Headers show

Commit Message

Keith Packard June 15, 2018, 2:52 a.m. UTC
Handle the case where the set of fences to wait for is not all of the
same type by either waiting for them sequentially (waitAll), or
polling them until the timer has expired (!waitAll). We hope the
latter case is not common.

While the current code makes sure that it always has fences of only
one type, that will not be true when we add WSI fences. Split out this
refactoring to make merging that clearer.

v2: Adopt Jason Ekstrand's coding conventions

    Declare variables at first use, eliminate extra whitespace between
    types and names. Wrap lines to 80 columns.

    Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>

Signed-off-by: Keith Packard <keithp@keithp.com>
---
 src/intel/vulkan/anv_queue.c | 105 +++++++++++++++++++++++++++++------
 1 file changed, 88 insertions(+), 17 deletions(-)

Comments

Jason Ekstrand June 19, 2018, 11:26 p.m. UTC | #1
I still don't really like this but I agree that this code really should not
be getting hit very often so it's probably not too bad.  Maybe one day
we'll be able to drop the non-syncobj paths entirely.  Wouldn't that be
nice.  In the mean time, this is probably fine.  I did see one issue below
with time conversions that should be fixed though.

On Thu, Jun 14, 2018 at 7:52 PM, Keith Packard <keithp@keithp.com> wrote:

> Handle the case where the set of fences to wait for is not all of the
> same type by either waiting for them sequentially (waitAll), or
> polling them until the timer has expired (!waitAll). We hope the
> latter case is not common.
>
> While the current code makes sure that it always has fences of only
> one type, that will not be true when we add WSI fences. Split out this
> refactoring to make merging that clearer.
>
> v2: Adopt Jason Ekstrand's coding conventions
>
>     Declare variables at first use, eliminate extra whitespace between
>     types and names. Wrap lines to 80 columns.
>
>     Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>
>
> Signed-off-by: Keith Packard <keithp@keithp.com>
> ---
>  src/intel/vulkan/anv_queue.c | 105 +++++++++++++++++++++++++++++------
>  1 file changed, 88 insertions(+), 17 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index 6fe04a0a19d..8df99c84549 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -459,12 +459,32 @@ gettime_ns(void)
>     return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
>  }
>
> +static uint64_t anv_get_absolute_timeout(uint64_t timeout)
> +{
> +   if (timeout == 0)
> +      return 0;
> +   uint64_t current_time = gettime_ns();
> +
> +   timeout = MIN2(INT64_MAX - current_time, timeout);
> +
> +   return (current_time + timeout);
> +}
>

This does not have the same behavior as the code it replaces.  In
particular, the old code saturates at INT64_MAX whereas this code does not
do so properly anymore.  If UINT64_MAX gets passed into timeout, I believe
that will be implicitly casted to signed and the MIN will yield -1 which
gets casted back to unsigned and you get UINT64_MAX again.  This is a
problem because the value we pass into the kernel ioctls is signed.  The
behavior of the kernel *should* be infinite when timeout < 0 but, on some
older kernels, -1 is treated as 0 and you get no timeout.

That said, I think I just saw a bug in the old code which I'll point out
below.


> +
> +static int64_t anv_get_relative_timeout(uint64_t abs_timeout)
> +{
> +   uint64_t now = gettime_ns();
> +
> +   if (abs_timeout < now)
> +      return 0;
> +   return abs_timeout - now;
> +}
> +
>  static VkResult
>  anv_wait_for_syncobj_fences(struct anv_device *device,
>                              uint32_t fenceCount,
>                              const VkFence *pFences,
>                              bool waitAll,
> -                            uint64_t _timeout)
> +                            uint64_t abs_timeout_ns)
>  {
>     uint32_t *syncobjs = vk_zalloc(&device->alloc,
>                                    sizeof(*syncobjs) * fenceCount, 8,
> @@ -484,19 +504,6 @@ anv_wait_for_syncobj_fences(struct anv_device
> *device,
>        syncobjs[i] = impl->syncobj;
>     }
>
> -   int64_t abs_timeout_ns = 0;
> -   if (_timeout > 0) {
> -      uint64_t current_ns = gettime_ns();
> -
> -      /* Add but saturate to INT32_MAX */
> -      if (current_ns + _timeout < current_ns)
> -         abs_timeout_ns = INT64_MAX;
> -      else if (current_ns + _timeout > INT64_MAX)
>

I suspect we need to cast INT64_MAX to a uint64_t here to ensure we don't
accidentally get an implicit conversion to signed.


> -         abs_timeout_ns = INT64_MAX;
> -      else
> -         abs_timeout_ns = current_ns + _timeout;
> -   }
> -
>     /* The gem_syncobj_wait ioctl may return early due to an inherent
>      * limitation in the way it computes timeouts.  Loop until we've
> actually
>      * passed the timeout.
> @@ -665,6 +672,67 @@ done:
>     return result;
>  }
>
> +static VkResult
> +anv_wait_for_fences(struct anv_device *device,
> +                    uint32_t fenceCount,
> +                    const VkFence *pFences,
> +                    bool waitAll,
> +                    uint64_t abs_timeout)
> +{
> +   VkResult result = VK_SUCCESS;
> +
> +   if (fenceCount <= 1 || waitAll) {
> +      for (uint32_t i = 0; i < fenceCount; i++) {
> +         ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> +         switch (fence->permanent.type) {
> +         case ANV_FENCE_TYPE_BO:
> +            result = anv_wait_for_bo_fences(
> +               device, 1, &pFences[i], true,
> +               anv_get_relative_timeout(abs_timeout));
> +            break;
> +         case ANV_FENCE_TYPE_SYNCOBJ:
> +            result = anv_wait_for_syncobj_fences(device, 1, &pFences[i],
> +                                                 true, abs_timeout);
> +            break;
> +         case ANV_FENCE_TYPE_NONE:
> +            result = VK_SUCCESS;
> +            break;
> +         }
> +         if (result != VK_SUCCESS)
> +            return result;
> +      }
> +   } else {
> +      while (gettime_ns() < abs_timeout) {
> +         for (uint32_t i = 0; i < fenceCount; i++) {
> +            if (anv_wait_for_fences(device, 1, &pFences[i], true, 0) ==
> VK_SUCCESS)
> +               return VK_SUCCESS;
> +         }
> +      }
> +      result = VK_TIMEOUT;
> +   }
> +   return result;
> +}
> +
> +static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence
> *pFences)
> +{
> +   for (uint32_t i = 0; i < fenceCount; ++i) {
> +      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> +      if (fence->permanent.type != ANV_FENCE_TYPE_SYNCOBJ)
> +         return false;
> +   }
> +   return true;
> +}
> +
> +static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
> +{
> +   for (uint32_t i = 0; i < fenceCount; ++i) {
> +      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
> +      if (fence->permanent.type != ANV_FENCE_TYPE_BO)
> +         return false;
> +   }
> +   return true;
> +}
> +
>  VkResult anv_WaitForFences(
>      VkDevice                                    _device,
>      uint32_t                                    fenceCount,
> @@ -677,12 +745,15 @@ VkResult anv_WaitForFences(
>     if (unlikely(device->lost))
>        return VK_ERROR_DEVICE_LOST;
>
> -   if (device->instance->physicalDevice.has_syncobj_wait) {
> +   if (anv_all_fences_syncobj(fenceCount, pFences)) {
>        return anv_wait_for_syncobj_fences(device, fenceCount, pFences,
> -                                         waitAll, timeout);
> -   } else {
> +                                         waitAll,
> anv_get_absolute_timeout(timeout));
> +   } else if (anv_all_fences_bo(fenceCount, pFences)) {
>        return anv_wait_for_bo_fences(device, fenceCount, pFences,
>                                      waitAll, timeout);
> +   } else {
> +      return anv_wait_for_fences(device, fenceCount, pFences,
> +                                 waitAll, anv_get_absolute_timeout(
> timeout));
>     }
>  }
>
> --
> 2.17.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Keith Packard June 20, 2018, 12:36 a.m. UTC | #2
Jason Ekstrand <jason@jlekstrand.net> writes:

> I still don't really like this but I agree that this code really should not
> be getting hit very often so it's probably not too bad.  Maybe one day
> we'll be able to drop the non-syncobj paths entirely.  Wouldn't that be
> nice.

I agree. What I want to have is kernel-side syncobj support for all of
the WSI fences that we need here.

I thought about using syncobjs and signaling them from user-space, but
realized that we couldn't eliminate the non-syncobj path quite yet as
that requires a new enough kernel. And, if we want to get to
kernel-native WSI syncobjs, that would mean we'd eventually have three
code paths. I think it's better to have one 'legacy' path, which works
on all kernels, and then one 'modern' path which does what we want.

> In the mean time, this is probably fine.  I did see one issue below
> with time conversions that should be fixed though.

Always a hard thing to get right.

>> +static uint64_t anv_get_absolute_timeout(uint64_t timeout)
>> +{
>> +   if (timeout == 0)
>> +      return 0;
>> +   uint64_t current_time = gettime_ns();
>> +
>> +   timeout = MIN2(INT64_MAX - current_time, timeout);
>> +
>> +   return (current_time + timeout);
>> +}
>>
>
> This does not have the same behavior as the code it replaces.  In
> particular, the old code saturates at INT64_MAX whereas this code does not
> do so properly anymore.  If UINT64_MAX gets passed into timeout, I believe
> that will be implicitly casted to signed and the MIN will yield -1 which
> gets casted back to unsigned and you get UINT64_MAX again.

It won't not get implicitly casted to signed; the implicit cast works
the other way where <signed> OP <unsigned> implicitly casts the <signed>
operand to unsigned for types of the same 'rank' (where 'rank' is not
quite equivalent to size). Here's a fine article on this:

https://wiki.sei.cmu.edu/confluence/display/c/INT02-C.+Understand+integer+conversion+rules

However, this is a rather obscure part of the ISO standard, and I don't
think we should expect that people reading the code know it well. Adding
a cast to make it clear what we want is a good idea. How about this?

        static uint64_t anv_get_absolute_timeout(uint64_t timeout)
        {
           if (timeout == 0)
              return 0;
           uint64_t current_time = gettime_ns();
           uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;

           timeout = MIN2(max_timeout, timeout);

           return (current_time + timeout);
        }

> This is a problem because the value we pass into the kernel ioctls is
> signed.  The behavior of the kernel *should* be infinite when timeout
> < 0 but, on some older kernels, -1 is treated as 0 and you get no
> timeout.

Yeah, we definitely want to cap the values to INT64_MAX.

>> -      else if (current_ns + _timeout > INT64_MAX)
>>
>
> I suspect we need to cast INT64_MAX to a uint64_t here to ensure we don't
> accidentally get an implicit conversion to signed.

Again, it's not necessary given the C conversion rules, but it is a good
way to clarify the intent.
Jason Ekstrand June 20, 2018, 12:42 a.m. UTC | #3
On Tue, Jun 19, 2018 at 5:36 PM, Keith Packard <keithp@keithp.com> wrote:

> Jason Ekstrand <jason@jlekstrand.net> writes:
>
> > I still don't really like this but I agree that this code really should
> not
> > be getting hit very often so it's probably not too bad.  Maybe one day
> > we'll be able to drop the non-syncobj paths entirely.  Wouldn't that be
> > nice.
>
> I agree. What I want to have is kernel-side syncobj support for all of
> the WSI fences that we need here.
>
> I thought about using syncobjs and signaling them from user-space, but
> realized that we couldn't eliminate the non-syncobj path quite yet as
> that requires a new enough kernel. And, if we want to get to
> kernel-native WSI syncobjs, that would mean we'd eventually have three
> code paths. I think it's better to have one 'legacy' path, which works
> on all kernels, and then one 'modern' path which does what we want.
>
> > In the mean time, this is probably fine.  I did see one issue below
> > with time conversions that should be fixed though.
>
> Always a hard thing to get right.
>
> >> +static uint64_t anv_get_absolute_timeout(uint64_t timeout)
> >> +{
> >> +   if (timeout == 0)
> >> +      return 0;
> >> +   uint64_t current_time = gettime_ns();
> >> +
> >> +   timeout = MIN2(INT64_MAX - current_time, timeout);
> >> +
> >> +   return (current_time + timeout);
> >> +}
> >>
> >
> > This does not have the same behavior as the code it replaces.  In
> > particular, the old code saturates at INT64_MAX whereas this code does
> not
> > do so properly anymore.  If UINT64_MAX gets passed into timeout, I
> believe
> > that will be implicitly casted to signed and the MIN will yield -1 which
> > gets casted back to unsigned and you get UINT64_MAX again.
>
> It won't not get implicitly casted to signed; the implicit cast works
> the other way where <signed> OP <unsigned> implicitly casts the <signed>
> operand to unsigned for types of the same 'rank' (where 'rank' is not
> quite equivalent to size). Here's a fine article on this:
>
> https://wiki.sei.cmu.edu/confluence/display/c/INT02-C.+
> Understand+integer+conversion+rules
>
> However, this is a rather obscure part of the ISO standard, and I don't
> think we should expect that people reading the code know it well.


This is why I insert casts like mad in these scenarios. :-)


> Adding
> a cast to make it clear what we want is a good idea. How about this?
>
>         static uint64_t anv_get_absolute_timeout(uint64_t timeout)
>         {
>            if (timeout == 0)
>               return 0;
>            uint64_t current_time = gettime_ns();
>            uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
>

I suppose we probably shouldn't worry about current_time being greater than
INT64_MAX?  I guess if that happens we have pretty big problems...


>            timeout = MIN2(max_timeout, timeout);
>
>            return (current_time + timeout);
>         }
>

Yeah, I think that's better.

--Jason
Keith Packard June 20, 2018, 1:22 a.m. UTC | #4
Jason Ekstrand <jason@jlekstrand.net> writes:

> I suppose we probably shouldn't worry about current_time being greater than
> INT64_MAX?  I guess if that happens we have pretty big problems...

Nope, we've already given up on that working -- it's a couple hundred
years of system uptime. Neither of us have any concerns in this area.

>>            timeout = MIN2(max_timeout, timeout);
>>
>>            return (current_time + timeout);
>>         }
>>
>
> Yeah, I think that's better.

Cool. I'll wait for further review :-)
Jason Ekstrand June 20, 2018, 2:22 a.m. UTC | #5
On Tue, Jun 19, 2018 at 6:22 PM, Keith Packard <keithp@keithp.com> wrote:

> Jason Ekstrand <jason@jlekstrand.net> writes:
>
> > I suppose we probably shouldn't worry about current_time being greater
> than
> > INT64_MAX?  I guess if that happens we have pretty big problems...
>
> Nope, we've already given up on that working -- it's a couple hundred
> years of system uptime. Neither of us have any concerns in this area.
>
> >>            timeout = MIN2(max_timeout, timeout);
> >>
> >>            return (current_time + timeout);
> >>         }
> >>
> >
> > Yeah, I think that's better.
>
> Cool. I'll wait for further review :-)
>

I don't think I have any more comments on this patch.  It's gross but I
think it should work.
Keith Packard June 20, 2018, 5:16 a.m. UTC | #6
Jason Ekstrand <jason@jlekstrand.net> writes:

> I don't think I have any more comments on this patch.  It's gross but I
> think it should work.

I'll mark you down as 'Acked-by' then. Neither of us loves the
implementation; I'll see about creating the kernel infrastructure
necessary to supplant it.
Jason Ekstrand June 20, 2018, 5:22 a.m. UTC | #7
On June 19, 2018 22:16:37 "Keith Packard" <keithp@keithp.com> wrote:

> Jason Ekstrand <jason@jlekstrand.net> writes:
>
>> I don't think I have any more comments on this patch.  It's gross but I
>> think it should work.
>
> I'll mark you down as 'Acked-by' then. Neither of us loves the
> implementation; I'll see about creating the kernel infrastructure
> necessary to supplant it.

You can have a full reviewed-by
Keith Packard June 20, 2018, 5:36 a.m. UTC | #8
Jason Ekstrand <jason@jlekstrand.net> writes:

> You can have a full reviewed-by

You're too kind :-)

Patch
diff mbox

diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index 6fe04a0a19d..8df99c84549 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -459,12 +459,32 @@  gettime_ns(void)
    return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
 }
 
+static uint64_t anv_get_absolute_timeout(uint64_t timeout)
+{
+   if (timeout == 0)
+      return 0;
+   uint64_t current_time = gettime_ns();
+
+   timeout = MIN2(INT64_MAX - current_time, timeout);
+
+   return (current_time + timeout);
+}
+
+static int64_t anv_get_relative_timeout(uint64_t abs_timeout)
+{
+   uint64_t now = gettime_ns();
+
+   if (abs_timeout < now)
+      return 0;
+   return abs_timeout - now;
+}
+
 static VkResult
 anv_wait_for_syncobj_fences(struct anv_device *device,
                             uint32_t fenceCount,
                             const VkFence *pFences,
                             bool waitAll,
-                            uint64_t _timeout)
+                            uint64_t abs_timeout_ns)
 {
    uint32_t *syncobjs = vk_zalloc(&device->alloc,
                                   sizeof(*syncobjs) * fenceCount, 8,
@@ -484,19 +504,6 @@  anv_wait_for_syncobj_fences(struct anv_device *device,
       syncobjs[i] = impl->syncobj;
    }
 
-   int64_t abs_timeout_ns = 0;
-   if (_timeout > 0) {
-      uint64_t current_ns = gettime_ns();
-
-      /* Add but saturate to INT32_MAX */
-      if (current_ns + _timeout < current_ns)
-         abs_timeout_ns = INT64_MAX;
-      else if (current_ns + _timeout > INT64_MAX)
-         abs_timeout_ns = INT64_MAX;
-      else
-         abs_timeout_ns = current_ns + _timeout;
-   }
-
    /* The gem_syncobj_wait ioctl may return early due to an inherent
     * limitation in the way it computes timeouts.  Loop until we've actually
     * passed the timeout.
@@ -665,6 +672,67 @@  done:
    return result;
 }
 
+static VkResult
+anv_wait_for_fences(struct anv_device *device,
+                    uint32_t fenceCount,
+                    const VkFence *pFences,
+                    bool waitAll,
+                    uint64_t abs_timeout)
+{
+   VkResult result = VK_SUCCESS;
+
+   if (fenceCount <= 1 || waitAll) {
+      for (uint32_t i = 0; i < fenceCount; i++) {
+         ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+         switch (fence->permanent.type) {
+         case ANV_FENCE_TYPE_BO:
+            result = anv_wait_for_bo_fences(
+               device, 1, &pFences[i], true,
+               anv_get_relative_timeout(abs_timeout));
+            break;
+         case ANV_FENCE_TYPE_SYNCOBJ:
+            result = anv_wait_for_syncobj_fences(device, 1, &pFences[i],
+                                                 true, abs_timeout);
+            break;
+         case ANV_FENCE_TYPE_NONE:
+            result = VK_SUCCESS;
+            break;
+         }
+         if (result != VK_SUCCESS)
+            return result;
+      }
+   } else {
+      while (gettime_ns() < abs_timeout) {
+         for (uint32_t i = 0; i < fenceCount; i++) {
+            if (anv_wait_for_fences(device, 1, &pFences[i], true, 0) == VK_SUCCESS)
+               return VK_SUCCESS;
+         }
+      }
+      result = VK_TIMEOUT;
+   }
+   return result;
+}
+
+static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
+{
+   for (uint32_t i = 0; i < fenceCount; ++i) {
+      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+      if (fence->permanent.type != ANV_FENCE_TYPE_SYNCOBJ)
+         return false;
+   }
+   return true;
+}
+
+static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
+{
+   for (uint32_t i = 0; i < fenceCount; ++i) {
+      ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+      if (fence->permanent.type != ANV_FENCE_TYPE_BO)
+         return false;
+   }
+   return true;
+}
+
 VkResult anv_WaitForFences(
     VkDevice                                    _device,
     uint32_t                                    fenceCount,
@@ -677,12 +745,15 @@  VkResult anv_WaitForFences(
    if (unlikely(device->lost))
       return VK_ERROR_DEVICE_LOST;
 
-   if (device->instance->physicalDevice.has_syncobj_wait) {
+   if (anv_all_fences_syncobj(fenceCount, pFences)) {
       return anv_wait_for_syncobj_fences(device, fenceCount, pFences,
-                                         waitAll, timeout);
-   } else {
+                                         waitAll, anv_get_absolute_timeout(timeout));
+   } else if (anv_all_fences_bo(fenceCount, pFences)) {
       return anv_wait_for_bo_fences(device, fenceCount, pFences,
                                     waitAll, timeout);
+   } else {
+      return anv_wait_for_fences(device, fenceCount, pFences,
+                                 waitAll, anv_get_absolute_timeout(timeout));
    }
 }