diff mbox series

vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]

Message ID 20181017164912.1704-1-keithp@keithp.com
State New, archived
Headers show
Series vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5] | expand

Commit Message

Keith Packard Oct. 17, 2018, 4:49 p.m. UTC
Offers three clocks, device, clock monotonic and clock monotonic
raw. Could use some kernel support to reduce the deviation between
clock values.

v2:
	Ensure deviation is at least as big as the GPU time interval.

v3:
	Set device->lost when returning DEVICE_LOST.
	Use MAX2 and DIV_ROUND_UP instead of open coding these.
	Delete spurious TIMESTAMP in radv version.

	Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
	Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

v4:
	Add anv_gem_reg_read to anv_gem_stubs.c

	Suggested-by: Jason Ekstrand <jason@jlekstrand.net>

v5:
	Adjust maxDeviation computation to max(sampled_clock_period) +
	sample_interval.

	Suggested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
	Suggested-by: Jason Ekstrand <jason@jlekstrand.net>

Signed-off-by: Keith Packard <keithp@keithp.com>
---
 src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++
 src/amd/vulkan/radv_extensions.py  |   1 +
 src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++
 src/intel/vulkan/anv_extensions.py |   1 +
 src/intel/vulkan/anv_gem.c         |  13 +++
 src/intel/vulkan/anv_gem_stubs.c   |   7 ++
 src/intel/vulkan/anv_private.h     |   2 +
 7 files changed, 270 insertions(+)

Comments

Jason Ekstrand Oct. 17, 2018, 5:06 p.m. UTC | #1
I like it

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

On Wed, Oct 17, 2018 at 11:49 AM Keith Packard <keithp@keithp.com> wrote:

> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
>         Ensure deviation is at least as big as the GPU time interval.
>
> v3:
>         Set device->lost when returning DEVICE_LOST.
>         Use MAX2 and DIV_ROUND_UP instead of open coding these.
>         Delete spurious TIMESTAMP in radv version.
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>         Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> v4:
>         Add anv_gem_reg_read to anv_gem_stubs.c
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> v5:
>         Adjust maxDeviation computation to max(sampled_clock_period) +
>         sample_interval.
>
>         Suggested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> Signed-off-by: Keith Packard <keithp@keithp.com>
> ---
>  src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++
>  src/amd/vulkan/radv_extensions.py  |   1 +
>  src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++
>  src/intel/vulkan/anv_extensions.py |   1 +
>  src/intel/vulkan/anv_gem.c         |  13 +++
>  src/intel/vulkan/anv_gem_stubs.c   |   7 ++
>  src/intel/vulkan/anv_private.h     |   2 +
>  7 files changed, 270 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..4a705a724ef 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
>                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
>                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
>  }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> +       VK_TIME_DOMAIN_DEVICE_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +       VkPhysicalDevice                             physicalDevice,
> +       uint32_t                                     *pTimeDomainCount,
> +       VkTimeDomainEXT                              *pTimeDomains)
> +{
> +       int d;
> +       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +       for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> +               vk_outarray_append(&out, i) {
> +                       *i = radv_time_domains[d];
> +               }
> +       }
> +
> +       return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> +       struct timespec current;
> +       int ret;
> +
> +       ret = clock_gettime(clock_id, &current);
> +       if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +               ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +       if (ret < 0)
> +               return 0;
> +
> +       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> +       VkDevice                                     _device,
> +       uint32_t                                     timestampCount,
> +       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +       uint64_t                                     *pTimestamps,
> +       uint64_t                                     *pMaxDeviation)
> +{
> +       RADV_FROM_HANDLE(radv_device, device, _device);
> +       uint32_t clock_crystal_freq =
> device->physical_device->rad_info.clock_crystal_freq;
> +       int d;
> +       uint64_t begin, end;
> +        uint64_t max_clock_period = 0;
> +
> +       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +       for (d = 0; d < timestampCount; d++) {
> +               switch (pTimestampInfos[d].timeDomain) {
> +               case VK_TIME_DOMAIN_DEVICE_EXT:
> +                       pTimestamps[d] =
> device->ws->query_value(device->ws,
> +
> RADEON_TIMESTAMP);
> +                        uint64_t device_period = DIV_ROUND_UP(1000000,
> clock_crystal_freq);
> +                        max_clock_period = MAX2(max_clock_period,
> device_period);
> +                       break;
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +                       pTimestamps[d] =
> radv_clock_gettime(CLOCK_MONOTONIC);
> +                        max_clock_period = MAX2(max_clock_period, 1);
> +                       break;
> +
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +                       pTimestamps[d] = begin;
> +                       break;
> +               default:
> +                       pTimestamps[d] = 0;
> +                       break;
> +               }
> +       }
> +
> +       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +        /*
> +         * The maximum deviation is the sum of the interval over which we
> +         * perform the sampling and the maximum period of any sampled
> +         * clock. That's because the maximum skew between any two sampled
> +         * clock edges is when the sampled clock with the largest period
> is
> +         * sampled at the end of that period but right at the beginning
> of the
> +         * sampling interval and some other clock is sampled right at the
> +         * begining of its sampling period and right at the end of the
> +         * sampling interval. Let's assume the GPU has the longest clock
> +         * period and that the application is sampling GPU and monotonic:
> +         *
> +         *                               s                 e
> +         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *                               g
> +         *               0         1         2         3
> +         *     GPU       -----_____-----_____-----_____-----_____
> +         *
> +         *                                                m
> +         *                                         x y z 0 1 2 3 4 5 6 7
> 8 9 a b c
> +         *     Monotonic
>  -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *     Interval                     <----------------->
> +         *     Deviation           <-------------------------->
> +         *
> +         *             s  = read(raw)       2
> +         *             g  = read(GPU)       1
> +         *             m  = read(monotonic) 2
> +         *             e  = read(raw)       b
> +         *
> +         * We round the sample interval up by one tick to cover sampling
> error
> +         * in the interval clock
> +         */
> +
> +        uint64_t sample_interval = end - begin + 1;
> +
> +        *pMaxDeviation = sample_interval + max_clock_period;
> +
> +       return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py
> b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
>      Extension('VK_KHR_display',                          23,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_direct_mode_display',               1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_acquire_xlib_display',              1,
> 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>      Extension('VK_EXT_conditional_rendering',             1, True),
>      Extension('VK_EXT_conservative_rasterization',        1,
> 'device->rad_info.chip_class >= GFX9'),
>      Extension('VK_EXT_display_surface_counter',           1,
> 'VK_USE_PLATFORM_DISPLAY_KHR'),
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index a2551452eb1..076ff3a57f6 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(
>     vk_free2(&device->alloc, pAllocator, fb);
>  }
>
> +static const VkTimeDomainEXT anv_time_domains[] = {
> +   VK_TIME_DOMAIN_DEVICE_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +   VkPhysicalDevice                             physicalDevice,
> +   uint32_t                                     *pTimeDomainCount,
> +   VkTimeDomainEXT                              *pTimeDomains)
> +{
> +   int d;
> +   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
> +      vk_outarray_append(&out, i) {
> +         *i = anv_time_domains[d];
> +      }
> +   }
> +
> +   return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +anv_clock_gettime(clockid_t clock_id)
> +{
> +   struct timespec current;
> +   int ret;
> +
> +   ret = clock_gettime(clock_id, &current);
> +   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +      ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +   if (ret < 0)
> +      return 0;
> +
> +   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +#define TIMESTAMP 0x2358
> +
> +VkResult anv_GetCalibratedTimestampsEXT(
> +   VkDevice                                     _device,
> +   uint32_t                                     timestampCount,
> +   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +   uint64_t                                     *pTimestamps,
> +   uint64_t                                     *pMaxDeviation)
> +{
> +   ANV_FROM_HANDLE(anv_device, device, _device);
> +   uint64_t timestamp_frequency = device->info.timestamp_frequency;
> +   int  ret;
> +   int d;
> +   uint64_t begin, end;
> +   uint64_t max_clock_period = 0;
> +
> +   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   for (d = 0; d < timestampCount; d++) {
> +      switch (pTimestampInfos[d].timeDomain) {
> +      case VK_TIME_DOMAIN_DEVICE_EXT:
> +         ret = anv_gem_reg_read(device, TIMESTAMP | 1,
> +                                &pTimestamps[d]);
> +
> +         if (ret != 0) {
> +            device->lost = TRUE;
> +            return VK_ERROR_DEVICE_LOST;
> +         }
> +         uint64_t device_period = DIV_ROUND_UP(1000000000,
> timestamp_frequency);
> +         max_clock_period = MAX2(max_clock_period, device_period);
> +         break;
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
> +         max_clock_period = MAX2(max_clock_period, 1);
> +         break;
> +
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +         pTimestamps[d] = begin;
> +         break;
> +      default:
> +         pTimestamps[d] = 0;
> +         break;
> +      }
> +   }
> +
> +   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +    /*
> +     * The maximum deviation is the sum of the interval over which we
> +     * perform the sampling and the maximum period of any sampled
> +     * clock. That's because the maximum skew between any two sampled
> +     * clock edges is when the sampled clock with the largest period is
> +     * sampled at the end of that period but right at the beginning of the
> +     * sampling interval and some other clock is sampled right at the
> +     * begining of its sampling period and right at the end of the
> +     * sampling interval. Let's assume the GPU has the longest clock
> +     * period and that the application is sampling GPU and monotonic:
> +     *
> +     *                               s                 e
> +     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     *                               g
> +     *           0         1         2         3
> +     * GPU       -----_____-----_____-----_____-----_____
> +     *
> +     *                                                m
> +     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c
> +     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     * Interval                     <----------------->
> +     * Deviation           <-------------------------->
> +     *
> +     *         s  = read(raw)       2
> +     *         g  = read(GPU)       1
> +     *         m  = read(monotonic) 2
> +     *         e  = read(raw)       b
> +     *
> +     * We round the sample interval up by one tick to cover sampling error
> +     * in the interval clock
> +     */
> +
> +   uint64_t sample_interval = end - begin + 1;
> +
> +   *pMaxDeviation = sample_interval + max_clock_period;
> +
> +   return VK_SUCCESS;
> +}
> +
>  /* vk_icd.h does not declare this function, so we declare it here to
>   * suppress Wmissing-prototypes.
>   */
> diff --git a/src/intel/vulkan/anv_extensions.py
> b/src/intel/vulkan/anv_extensions.py
> index d4915c95013..a8535964da7 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -126,6 +126,7 @@ EXTENSIONS = [
>      Extension('VK_EXT_vertex_attribute_divisor',          3, True),
>      Extension('VK_EXT_post_depth_coverage',               1,
> 'device->info.gen >= 9'),
>      Extension('VK_EXT_sampler_filter_minmax',             1,
> 'device->info.gen >= 9'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>  ]
>
>  class VkVersion:
> diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
> index c43b5ef9e06..1bdf040c1a3 100644
> --- a/src/intel/vulkan/anv_gem.c
> +++ b/src/intel/vulkan/anv_gem.c
> @@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int
> fd)
>     return args.handle;
>  }
>
> +int
> +anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t
> *result)
> +{
> +   struct drm_i915_reg_read args = {
> +      .offset = offset
> +   };
> +
> +   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
> +
> +   *result = args.val;
> +   return ret;
> +}
> +
>  #ifndef SYNC_IOC_MAGIC
>  /* duplicated from linux/sync_file.h to avoid build-time dependency
>   * on new (v4.7) kernel headers.  Once distro's are mostly using
> diff --git a/src/intel/vulkan/anv_gem_stubs.c
> b/src/intel/vulkan/anv_gem_stubs.c
> index 5093bd5db1a..8cc3ad1f22e 100644
> --- a/src/intel/vulkan/anv_gem_stubs.c
> +++ b/src/intel/vulkan/anv_gem_stubs.c
> @@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
>  {
>     unreachable("Unused");
>  }
> +
> +int
> +anv_gem_reg_read(struct anv_device *device,
> +                 uint32_t offset, uint64_t *result)
> +{
> +   unreachable("Unused");
> +}
> diff --git a/src/intel/vulkan/anv_private.h
> b/src/intel/vulkan/anv_private.h
> index 599b903f25c..08376b00c8e 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
>  int anv_gem_gpu_get_reset_stats(struct anv_device *device,
>                                  uint32_t *active, uint32_t *pending);
>  int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
> +int anv_gem_reg_read(struct anv_device *device,
> +                     uint32_t offset, uint64_t *result);
>  uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
>  int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
> uint32_t caching);
>  int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
<div dir="ltr"><div>I like it</div><div><br></div><div>Reviewed-by: Jason Ekstrand &lt;<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>&gt;<br></div></div><br><div class="gmail_quote"><div dir="ltr">On Wed, Oct 17, 2018 at 11:49 AM Keith Packard &lt;<a href="mailto:keithp@keithp.com">keithp@keithp.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Offers three clocks, device, clock monotonic and clock monotonic<br>
raw. Could use some kernel support to reduce the deviation between<br>
clock values.<br>
<br>
v2:<br>
        Ensure deviation is at least as big as the GPU time interval.<br>
<br>
v3:<br>
        Set device-&gt;lost when returning DEVICE_LOST.<br>
        Use MAX2 and DIV_ROUND_UP instead of open coding these.<br>
        Delete spurious TIMESTAMP in radv version.<br>
<br>
        Suggested-by: Jason Ekstrand &lt;<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>&gt;<br>
        Suggested-by: Lionel Landwerlin &lt;<a href="mailto:lionel.g.landwerlin@intel.com" target="_blank">lionel.g.landwerlin@intel.com</a>&gt;<br>
<br>
v4:<br>
        Add anv_gem_reg_read to anv_gem_stubs.c<br>
<br>
        Suggested-by: Jason Ekstrand &lt;<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>&gt;<br>
<br>
v5:<br>
        Adjust maxDeviation computation to max(sampled_clock_period) +<br>
        sample_interval.<br>
<br>
        Suggested-by: Bas Nieuwenhuizen &lt;<a href="mailto:bas@basnieuwenhuizen.nl" target="_blank">bas@basnieuwenhuizen.nl</a>&gt;<br>
        Suggested-by: Jason Ekstrand &lt;<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>&gt;<br>
<br>
Signed-off-by: Keith Packard &lt;<a href="mailto:keithp@keithp.com" target="_blank">keithp@keithp.com</a>&gt;<br>
---<br>
 src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++<br>
 src/amd/vulkan/radv_extensions.py  |   1 +<br>
 src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++<br>
 src/intel/vulkan/anv_extensions.py |   1 +<br>
 src/intel/vulkan/anv_gem.c         |  13 +++<br>
 src/intel/vulkan/anv_gem_stubs.c   |   7 ++<br>
 src/intel/vulkan/anv_private.h     |   2 +<br>
 7 files changed, 270 insertions(+)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index 174922780fc..4a705a724ef 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(<br>
                               VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |<br>
                               VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;<br>
 }<br>
+<br>
+static const VkTimeDomainEXT radv_time_domains[] = {<br>
+       VK_TIME_DOMAIN_DEVICE_EXT,<br>
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,<br>
+       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,<br>
+};<br>
+<br>
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(<br>
+       VkPhysicalDevice                             physicalDevice,<br>
+       uint32_t                                     *pTimeDomainCount,<br>
+       VkTimeDomainEXT                              *pTimeDomains)<br>
+{<br>
+       int d;<br>
+       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);<br>
+<br>
+       for (d = 0; d &lt; ARRAY_SIZE(radv_time_domains); d++) {<br>
+               vk_outarray_append(&amp;out, i) {<br>
+                       *i = radv_time_domains[d];<br>
+               }<br>
+       }<br>
+<br>
+       return vk_outarray_status(&amp;out);<br>
+}<br>
+<br>
+static uint64_t<br>
+radv_clock_gettime(clockid_t clock_id)<br>
+{<br>
+       struct timespec current;<br>
+       int ret;<br>
+<br>
+       ret = clock_gettime(clock_id, &amp;current);<br>
+       if (ret &lt; 0 &amp;&amp; clock_id == CLOCK_MONOTONIC_RAW)<br>
+               ret = clock_gettime(CLOCK_MONOTONIC, &amp;current);<br>
+       if (ret &lt; 0)<br>
+               return 0;<br>
+<br>
+       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;<br>
+}<br>
+<br>
+VkResult radv_GetCalibratedTimestampsEXT(<br>
+       VkDevice                                     _device,<br>
+       uint32_t                                     timestampCount,<br>
+       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,<br>
+       uint64_t                                     *pTimestamps,<br>
+       uint64_t                                     *pMaxDeviation)<br>
+{<br>
+       RADV_FROM_HANDLE(radv_device, device, _device);<br>
+       uint32_t clock_crystal_freq = device-&gt;physical_device-&gt;rad_info.clock_crystal_freq;<br>
+       int d;<br>
+       uint64_t begin, end;<br>
+        uint64_t max_clock_period = 0;<br>
+<br>
+       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+       for (d = 0; d &lt; timestampCount; d++) {<br>
+               switch (pTimestampInfos[d].timeDomain) {<br>
+               case VK_TIME_DOMAIN_DEVICE_EXT:<br>
+                       pTimestamps[d] = device-&gt;ws-&gt;query_value(device-&gt;ws,<br>
+                                                                RADEON_TIMESTAMP);<br>
+                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);<br>
+                        max_clock_period = MAX2(max_clock_period, device_period);<br>
+                       break;<br>
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:<br>
+                       pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);<br>
+                        max_clock_period = MAX2(max_clock_period, 1);<br>
+                       break;<br>
+<br>
+               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:<br>
+                       pTimestamps[d] = begin;<br>
+                       break;<br>
+               default:<br>
+                       pTimestamps[d] = 0;<br>
+                       break;<br>
+               }<br>
+       }<br>
+<br>
+       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+        /*<br>
+         * The maximum deviation is the sum of the interval over which we<br>
+         * perform the sampling and the maximum period of any sampled<br>
+         * clock. That&#39;s because the maximum skew between any two sampled<br>
+         * clock edges is when the sampled clock with the largest period is<br>
+         * sampled at the end of that period but right at the beginning of the<br>
+         * sampling interval and some other clock is sampled right at the<br>
+         * begining of its sampling period and right at the end of the<br>
+         * sampling interval. Let&#39;s assume the GPU has the longest clock<br>
+         * period and that the application is sampling GPU and monotonic:<br>
+         *<br>
+         *                               s                 e<br>
+         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f<br>
+         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+         *<br>
+         *                               g<br>
+         *               0         1         2         3<br>
+         *     GPU       -----_____-----_____-----_____-----_____<br>
+         *<br>
+         *                                                m<br>
+         *                                         x y z 0 1 2 3 4 5 6 7 8 9 a b c<br>
+         *     Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+         *<br>
+         *     Interval                     &lt;-----------------&gt;<br>
+         *     Deviation           &lt;--------------------------&gt;<br>
+         *<br>
+         *             s  = read(raw)       2<br>
+         *             g  = read(GPU)       1<br>
+         *             m  = read(monotonic) 2<br>
+         *             e  = read(raw)       b<br>
+         *<br>
+         * We round the sample interval up by one tick to cover sampling error<br>
+         * in the interval clock<br>
+         */<br>
+<br>
+        uint64_t sample_interval = end - begin + 1;<br>
+<br>
+        *pMaxDeviation = sample_interval + max_clock_period;<br>
+<br>
+       return VK_SUCCESS;<br>
+}<br>
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py<br>
index 5dcedae1c63..4c81d3f0068 100644<br>
--- a/src/amd/vulkan/radv_extensions.py<br>
+++ b/src/amd/vulkan/radv_extensions.py<br>
@@ -92,6 +92,7 @@ EXTENSIONS = [<br>
     Extension(&#39;VK_KHR_display&#39;,                          23, &#39;VK_USE_PLATFORM_DISPLAY_KHR&#39;),<br>
     Extension(&#39;VK_EXT_direct_mode_display&#39;,               1, &#39;VK_USE_PLATFORM_DISPLAY_KHR&#39;),<br>
     Extension(&#39;VK_EXT_acquire_xlib_display&#39;,              1, &#39;VK_USE_PLATFORM_XLIB_XRANDR_EXT&#39;),<br>
+    Extension(&#39;VK_EXT_calibrated_timestamps&#39;,             1, True),<br>
     Extension(&#39;VK_EXT_conditional_rendering&#39;,             1, True),<br>
     Extension(&#39;VK_EXT_conservative_rasterization&#39;,        1, &#39;device-&gt;rad_info.chip_class &gt;= GFX9&#39;),<br>
     Extension(&#39;VK_EXT_display_surface_counter&#39;,           1, &#39;VK_USE_PLATFORM_DISPLAY_KHR&#39;),<br>
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c<br>
index a2551452eb1..076ff3a57f6 100644<br>
--- a/src/intel/vulkan/anv_device.c<br>
+++ b/src/intel/vulkan/anv_device.c<br>
@@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(<br>
    vk_free2(&amp;device-&gt;alloc, pAllocator, fb);<br>
 }<br>
<br>
+static const VkTimeDomainEXT anv_time_domains[] = {<br>
+   VK_TIME_DOMAIN_DEVICE_EXT,<br>
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,<br>
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,<br>
+};<br>
+<br>
+VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(<br>
+   VkPhysicalDevice                             physicalDevice,<br>
+   uint32_t                                     *pTimeDomainCount,<br>
+   VkTimeDomainEXT                              *pTimeDomains)<br>
+{<br>
+   int d;<br>
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);<br>
+<br>
+   for (d = 0; d &lt; ARRAY_SIZE(anv_time_domains); d++) {<br>
+      vk_outarray_append(&amp;out, i) {<br>
+         *i = anv_time_domains[d];<br>
+      }<br>
+   }<br>
+<br>
+   return vk_outarray_status(&amp;out);<br>
+}<br>
+<br>
+static uint64_t<br>
+anv_clock_gettime(clockid_t clock_id)<br>
+{<br>
+   struct timespec current;<br>
+   int ret;<br>
+<br>
+   ret = clock_gettime(clock_id, &amp;current);<br>
+   if (ret &lt; 0 &amp;&amp; clock_id == CLOCK_MONOTONIC_RAW)<br>
+      ret = clock_gettime(CLOCK_MONOTONIC, &amp;current);<br>
+   if (ret &lt; 0)<br>
+      return 0;<br>
+<br>
+   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;<br>
+}<br>
+<br>
+#define TIMESTAMP 0x2358<br>
+<br>
+VkResult anv_GetCalibratedTimestampsEXT(<br>
+   VkDevice                                     _device,<br>
+   uint32_t                                     timestampCount,<br>
+   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,<br>
+   uint64_t                                     *pTimestamps,<br>
+   uint64_t                                     *pMaxDeviation)<br>
+{<br>
+   ANV_FROM_HANDLE(anv_device, device, _device);<br>
+   uint64_t timestamp_frequency = device-&gt;info.timestamp_frequency;<br>
+   int  ret;<br>
+   int d;<br>
+   uint64_t begin, end;<br>
+   uint64_t max_clock_period = 0;<br>
+<br>
+   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+   for (d = 0; d &lt; timestampCount; d++) {<br>
+      switch (pTimestampInfos[d].timeDomain) {<br>
+      case VK_TIME_DOMAIN_DEVICE_EXT:<br>
+         ret = anv_gem_reg_read(device, TIMESTAMP | 1,<br>
+                                &amp;pTimestamps[d]);<br>
+<br>
+         if (ret != 0) {<br>
+            device-&gt;lost = TRUE;<br>
+            return VK_ERROR_DEVICE_LOST;<br>
+         }<br>
+         uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);<br>
+         max_clock_period = MAX2(max_clock_period, device_period);<br>
+         break;<br>
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:<br>
+         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);<br>
+         max_clock_period = MAX2(max_clock_period, 1);<br>
+         break;<br>
+<br>
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:<br>
+         pTimestamps[d] = begin;<br>
+         break;<br>
+      default:<br>
+         pTimestamps[d] = 0;<br>
+         break;<br>
+      }<br>
+   }<br>
+<br>
+   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);<br>
+<br>
+    /*<br>
+     * The maximum deviation is the sum of the interval over which we<br>
+     * perform the sampling and the maximum period of any sampled<br>
+     * clock. That&#39;s because the maximum skew between any two sampled<br>
+     * clock edges is when the sampled clock with the largest period is<br>
+     * sampled at the end of that period but right at the beginning of the<br>
+     * sampling interval and some other clock is sampled right at the<br>
+     * begining of its sampling period and right at the end of the<br>
+     * sampling interval. Let&#39;s assume the GPU has the longest clock<br>
+     * period and that the application is sampling GPU and monotonic:<br>
+     *<br>
+     *                               s                 e<br>
+     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f<br>
+     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+     *<br>
+     *                               g<br>
+     *           0         1         2         3<br>
+     * GPU       -----_____-----_____-----_____-----_____<br>
+     *<br>
+     *                                                m<br>
+     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c<br>
+     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-<br>
+     *<br>
+     * Interval                     &lt;-----------------&gt;<br>
+     * Deviation           &lt;--------------------------&gt;<br>
+     *<br>
+     *         s  = read(raw)       2<br>
+     *         g  = read(GPU)       1<br>
+     *         m  = read(monotonic) 2<br>
+     *         e  = read(raw)       b<br>
+     *<br>
+     * We round the sample interval up by one tick to cover sampling error<br>
+     * in the interval clock<br>
+     */<br>
+<br>
+   uint64_t sample_interval = end - begin + 1;<br>
+<br>
+   *pMaxDeviation = sample_interval + max_clock_period;<br>
+<br>
+   return VK_SUCCESS;<br>
+}<br>
+<br>
 /* vk_icd.h does not declare this function, so we declare it here to<br>
  * suppress Wmissing-prototypes.<br>
  */<br>
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py<br>
index d4915c95013..a8535964da7 100644<br>
--- a/src/intel/vulkan/anv_extensions.py<br>
+++ b/src/intel/vulkan/anv_extensions.py<br>
@@ -126,6 +126,7 @@ EXTENSIONS = [<br>
     Extension(&#39;VK_EXT_vertex_attribute_divisor&#39;,          3, True),<br>
     Extension(&#39;VK_EXT_post_depth_coverage&#39;,               1, &#39;device-&gt;info.gen &gt;= 9&#39;),<br>
     Extension(&#39;VK_EXT_sampler_filter_minmax&#39;,             1, &#39;device-&gt;info.gen &gt;= 9&#39;),<br>
+    Extension(&#39;VK_EXT_calibrated_timestamps&#39;,             1, True),<br>
 ]<br>
<br>
 class VkVersion:<br>
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c<br>
index c43b5ef9e06..1bdf040c1a3 100644<br>
--- a/src/intel/vulkan/anv_gem.c<br>
+++ b/src/intel/vulkan/anv_gem.c<br>
@@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)<br>
    return args.handle;<br>
 }<br>
<br>
+int<br>
+anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)<br>
+{<br>
+   struct drm_i915_reg_read args = {<br>
+      .offset = offset<br>
+   };<br>
+<br>
+   int ret = anv_ioctl(device-&gt;fd, DRM_IOCTL_I915_REG_READ, &amp;args);<br>
+<br>
+   *result = args.val;<br>
+   return ret;<br>
+}<br>
+<br>
 #ifndef SYNC_IOC_MAGIC<br>
 /* duplicated from linux/sync_file.h to avoid build-time dependency<br>
  * on new (v4.7) kernel headers.  Once distro&#39;s are mostly using<br>
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c<br>
index 5093bd5db1a..8cc3ad1f22e 100644<br>
--- a/src/intel/vulkan/anv_gem_stubs.c<br>
+++ b/src/intel/vulkan/anv_gem_stubs.c<br>
@@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,<br>
 {<br>
    unreachable(&quot;Unused&quot;);<br>
 }<br>
+<br>
+int<br>
+anv_gem_reg_read(struct anv_device *device,<br>
+                 uint32_t offset, uint64_t *result)<br>
+{<br>
+   unreachable(&quot;Unused&quot;);<br>
+}<br>
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h<br>
index 599b903f25c..08376b00c8e 100644<br>
--- a/src/intel/vulkan/anv_private.h<br>
+++ b/src/intel/vulkan/anv_private.h<br>
@@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);<br>
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,<br>
                                 uint32_t *active, uint32_t *pending);<br>
 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);<br>
+int anv_gem_reg_read(struct anv_device *device,<br>
+                     uint32_t offset, uint64_t *result);<br>
 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);<br>
 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);<br>
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>
Keith Packard Oct. 17, 2018, 5:24 p.m. UTC | #2
Jason Ekstrand <jason@jlekstrand.net> writes:

> I like it

When the comments are longer than the code, you know you're done?
Bas Nieuwenhuizen Oct. 17, 2018, 10:49 p.m. UTC | #3
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
On Wed, Oct 17, 2018 at 6:49 PM Keith Packard <keithp@keithp.com> wrote:
>
> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
>         Ensure deviation is at least as big as the GPU time interval.
>
> v3:
>         Set device->lost when returning DEVICE_LOST.
>         Use MAX2 and DIV_ROUND_UP instead of open coding these.
>         Delete spurious TIMESTAMP in radv version.
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>         Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> v4:
>         Add anv_gem_reg_read to anv_gem_stubs.c
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> v5:
>         Adjust maxDeviation computation to max(sampled_clock_period) +
>         sample_interval.
>
>         Suggested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> Signed-off-by: Keith Packard <keithp@keithp.com>
> ---
>  src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++
>  src/amd/vulkan/radv_extensions.py  |   1 +
>  src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++
>  src/intel/vulkan/anv_extensions.py |   1 +
>  src/intel/vulkan/anv_gem.c         |  13 +++
>  src/intel/vulkan/anv_gem_stubs.c   |   7 ++
>  src/intel/vulkan/anv_private.h     |   2 +
>  7 files changed, 270 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..4a705a724ef 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
>                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
>                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
>  }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> +       VK_TIME_DOMAIN_DEVICE_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +       VkPhysicalDevice                             physicalDevice,
> +       uint32_t                                     *pTimeDomainCount,
> +       VkTimeDomainEXT                              *pTimeDomains)
> +{
> +       int d;
> +       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +       for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> +               vk_outarray_append(&out, i) {
> +                       *i = radv_time_domains[d];
> +               }
> +       }
> +
> +       return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> +       struct timespec current;
> +       int ret;
> +
> +       ret = clock_gettime(clock_id, &current);
> +       if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +               ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +       if (ret < 0)
> +               return 0;
> +
> +       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> +       VkDevice                                     _device,
> +       uint32_t                                     timestampCount,
> +       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +       uint64_t                                     *pTimestamps,
> +       uint64_t                                     *pMaxDeviation)
> +{
> +       RADV_FROM_HANDLE(radv_device, device, _device);
> +       uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
> +       int d;
> +       uint64_t begin, end;
> +        uint64_t max_clock_period = 0;
> +
> +       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +       for (d = 0; d < timestampCount; d++) {
> +               switch (pTimestampInfos[d].timeDomain) {
> +               case VK_TIME_DOMAIN_DEVICE_EXT:
> +                       pTimestamps[d] = device->ws->query_value(device->ws,
> +                                                                RADEON_TIMESTAMP);
> +                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
> +                        max_clock_period = MAX2(max_clock_period, device_period);
> +                       break;
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +                       pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
> +                        max_clock_period = MAX2(max_clock_period, 1);
> +                       break;
> +
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +                       pTimestamps[d] = begin;
> +                       break;
> +               default:
> +                       pTimestamps[d] = 0;
> +                       break;
> +               }
> +       }
> +
> +       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +        /*
> +         * The maximum deviation is the sum of the interval over which we
> +         * perform the sampling and the maximum period of any sampled
> +         * clock. That's because the maximum skew between any two sampled
> +         * clock edges is when the sampled clock with the largest period is
> +         * sampled at the end of that period but right at the beginning of the
> +         * sampling interval and some other clock is sampled right at the
> +         * begining of its sampling period and right at the end of the
> +         * sampling interval. Let's assume the GPU has the longest clock
> +         * period and that the application is sampling GPU and monotonic:
> +         *
> +         *                               s                 e
> +         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *                               g
> +         *               0         1         2         3
> +         *     GPU       -----_____-----_____-----_____-----_____
> +         *
> +         *                                                m
> +         *                                         x y z 0 1 2 3 4 5 6 7 8 9 a b c
> +         *     Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *     Interval                     <----------------->
> +         *     Deviation           <-------------------------->
> +         *
> +         *             s  = read(raw)       2
> +         *             g  = read(GPU)       1
> +         *             m  = read(monotonic) 2
> +         *             e  = read(raw)       b
> +         *
> +         * We round the sample interval up by one tick to cover sampling error
> +         * in the interval clock
> +         */
> +
> +        uint64_t sample_interval = end - begin + 1;
> +
> +        *pMaxDeviation = sample_interval + max_clock_period;
> +
> +       return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
>      Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>      Extension('VK_EXT_conditional_rendering',             1, True),
>      Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),
>      Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index a2551452eb1..076ff3a57f6 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(
>     vk_free2(&device->alloc, pAllocator, fb);
>  }
>
> +static const VkTimeDomainEXT anv_time_domains[] = {
> +   VK_TIME_DOMAIN_DEVICE_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +   VkPhysicalDevice                             physicalDevice,
> +   uint32_t                                     *pTimeDomainCount,
> +   VkTimeDomainEXT                              *pTimeDomains)
> +{
> +   int d;
> +   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
> +      vk_outarray_append(&out, i) {
> +         *i = anv_time_domains[d];
> +      }
> +   }
> +
> +   return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +anv_clock_gettime(clockid_t clock_id)
> +{
> +   struct timespec current;
> +   int ret;
> +
> +   ret = clock_gettime(clock_id, &current);
> +   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +      ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +   if (ret < 0)
> +      return 0;
> +
> +   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +#define TIMESTAMP 0x2358
> +
> +VkResult anv_GetCalibratedTimestampsEXT(
> +   VkDevice                                     _device,
> +   uint32_t                                     timestampCount,
> +   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +   uint64_t                                     *pTimestamps,
> +   uint64_t                                     *pMaxDeviation)
> +{
> +   ANV_FROM_HANDLE(anv_device, device, _device);
> +   uint64_t timestamp_frequency = device->info.timestamp_frequency;
> +   int  ret;
> +   int d;
> +   uint64_t begin, end;
> +   uint64_t max_clock_period = 0;
> +
> +   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   for (d = 0; d < timestampCount; d++) {
> +      switch (pTimestampInfos[d].timeDomain) {
> +      case VK_TIME_DOMAIN_DEVICE_EXT:
> +         ret = anv_gem_reg_read(device, TIMESTAMP | 1,
> +                                &pTimestamps[d]);
> +
> +         if (ret != 0) {
> +            device->lost = TRUE;
> +            return VK_ERROR_DEVICE_LOST;
> +         }
> +         uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
> +         max_clock_period = MAX2(max_clock_period, device_period);
> +         break;
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
> +         max_clock_period = MAX2(max_clock_period, 1);
> +         break;
> +
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +         pTimestamps[d] = begin;
> +         break;
> +      default:
> +         pTimestamps[d] = 0;
> +         break;
> +      }
> +   }
> +
> +   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +    /*
> +     * The maximum deviation is the sum of the interval over which we
> +     * perform the sampling and the maximum period of any sampled
> +     * clock. That's because the maximum skew between any two sampled
> +     * clock edges is when the sampled clock with the largest period is
> +     * sampled at the end of that period but right at the beginning of the
> +     * sampling interval and some other clock is sampled right at the
> +     * begining of its sampling period and right at the end of the
> +     * sampling interval. Let's assume the GPU has the longest clock
> +     * period and that the application is sampling GPU and monotonic:
> +     *
> +     *                               s                 e
> +     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     *                               g
> +     *           0         1         2         3
> +     * GPU       -----_____-----_____-----_____-----_____
> +     *
> +     *                                                m
> +     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c
> +     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     * Interval                     <----------------->
> +     * Deviation           <-------------------------->
> +     *
> +     *         s  = read(raw)       2
> +     *         g  = read(GPU)       1
> +     *         m  = read(monotonic) 2
> +     *         e  = read(raw)       b
> +     *
> +     * We round the sample interval up by one tick to cover sampling error
> +     * in the interval clock
> +     */
> +
> +   uint64_t sample_interval = end - begin + 1;
> +
> +   *pMaxDeviation = sample_interval + max_clock_period;
> +
> +   return VK_SUCCESS;
> +}
> +
>  /* vk_icd.h does not declare this function, so we declare it here to
>   * suppress Wmissing-prototypes.
>   */
> diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
> index d4915c95013..a8535964da7 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -126,6 +126,7 @@ EXTENSIONS = [
>      Extension('VK_EXT_vertex_attribute_divisor',          3, True),
>      Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
>      Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>  ]
>
>  class VkVersion:
> diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
> index c43b5ef9e06..1bdf040c1a3 100644
> --- a/src/intel/vulkan/anv_gem.c
> +++ b/src/intel/vulkan/anv_gem.c
> @@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)
>     return args.handle;
>  }
>
> +int
> +anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
> +{
> +   struct drm_i915_reg_read args = {
> +      .offset = offset
> +   };
> +
> +   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
> +
> +   *result = args.val;
> +   return ret;
> +}
> +
>  #ifndef SYNC_IOC_MAGIC
>  /* duplicated from linux/sync_file.h to avoid build-time dependency
>   * on new (v4.7) kernel headers.  Once distro's are mostly using
> diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
> index 5093bd5db1a..8cc3ad1f22e 100644
> --- a/src/intel/vulkan/anv_gem_stubs.c
> +++ b/src/intel/vulkan/anv_gem_stubs.c
> @@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
>  {
>     unreachable("Unused");
>  }
> +
> +int
> +anv_gem_reg_read(struct anv_device *device,
> +                 uint32_t offset, uint64_t *result)
> +{
> +   unreachable("Unused");
> +}
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 599b903f25c..08376b00c8e 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
>  int anv_gem_gpu_get_reset_stats(struct anv_device *device,
>                                  uint32_t *active, uint32_t *pending);
>  int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
> +int anv_gem_reg_read(struct anv_device *device,
> +                     uint32_t offset, uint64_t *result);
>  uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
>  int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
>  int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Keith Packard Oct. 18, 2018, 3:14 a.m. UTC | #4
Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> writes:

> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>

Thanks to you, Jason and Lionel for reviewing the code and helping
improve it.
diff mbox series

Patch

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 174922780fc..4a705a724ef 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -4955,3 +4955,122 @@  radv_GetDeviceGroupPeerMemoryFeatures(
 	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
 	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+	VK_TIME_DOMAIN_DEVICE_EXT,
+	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+	VkPhysicalDevice                             physicalDevice,
+	uint32_t                                     *pTimeDomainCount,
+	VkTimeDomainEXT                              *pTimeDomains)
+{
+	int d;
+	VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+		vk_outarray_append(&out, i) {
+			*i = radv_time_domains[d];
+		}
+	}
+
+	return vk_outarray_status(&out);
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+	struct timespec current;
+	int ret;
+
+	ret = clock_gettime(clock_id, &current);
+	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+		ret = clock_gettime(CLOCK_MONOTONIC, &current);
+	if (ret < 0)
+		return 0;
+
+	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+	VkDevice                                     _device,
+	uint32_t                                     timestampCount,
+	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
+	uint64_t                                     *pTimestamps,
+	uint64_t                                     *pMaxDeviation)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
+	int d;
+	uint64_t begin, end;
+        uint64_t max_clock_period = 0;
+
+	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+	for (d = 0; d < timestampCount; d++) {
+		switch (pTimestampInfos[d].timeDomain) {
+		case VK_TIME_DOMAIN_DEVICE_EXT:
+			pTimestamps[d] = device->ws->query_value(device->ws,
+								 RADEON_TIMESTAMP);
+                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
+                        max_clock_period = MAX2(max_clock_period, device_period);
+			break;
+		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+                        max_clock_period = MAX2(max_clock_period, 1);
+			break;
+
+		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+			pTimestamps[d] = begin;
+			break;
+		default:
+			pTimestamps[d] = 0;
+			break;
+		}
+	}
+
+	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+        /*
+         * The maximum deviation is the sum of the interval over which we
+         * perform the sampling and the maximum period of any sampled
+         * clock. That's because the maximum skew between any two sampled
+         * clock edges is when the sampled clock with the largest period is
+         * sampled at the end of that period but right at the beginning of the
+         * sampling interval and some other clock is sampled right at the
+         * begining of its sampling period and right at the end of the
+         * sampling interval. Let's assume the GPU has the longest clock
+         * period and that the application is sampling GPU and monotonic:
+         *
+         *                               s                 e
+         *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+         *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *                               g
+         *		  0         1         2         3
+         *	GPU       -----_____-----_____-----_____-----_____
+         *
+         *                                                m
+         *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
+         *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *	Interval                     <----------------->
+         *	Deviation           <-------------------------->
+         *
+         *		s  = read(raw)       2
+         *		g  = read(GPU)       1
+         *		m  = read(monotonic) 2
+         *		e  = read(raw)       b
+         *
+         * We round the sample interval up by one tick to cover sampling error
+         * in the interval clock
+         */
+
+        uint64_t sample_interval = end - begin + 1;
+
+        *pMaxDeviation = sample_interval + max_clock_period;
+
+	return VK_SUCCESS;
+}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 5dcedae1c63..4c81d3f0068 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -92,6 +92,7 @@  EXTENSIONS = [
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_EXT_conditional_rendering',             1, True),
     Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),
     Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index a2551452eb1..076ff3a57f6 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -3021,6 +3021,133 @@  void anv_DestroyFramebuffer(
    vk_free2(&device->alloc, pAllocator, fb);
 }
 
+static const VkTimeDomainEXT anv_time_domains[] = {
+   VK_TIME_DOMAIN_DEVICE_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+   VkPhysicalDevice                             physicalDevice,
+   uint32_t                                     *pTimeDomainCount,
+   VkTimeDomainEXT                              *pTimeDomains)
+{
+   int d;
+   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
+      vk_outarray_append(&out, i) {
+         *i = anv_time_domains[d];
+      }
+   }
+
+   return vk_outarray_status(&out);
+}
+
+static uint64_t
+anv_clock_gettime(clockid_t clock_id)
+{
+   struct timespec current;
+   int ret;
+
+   ret = clock_gettime(clock_id, &current);
+   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+      ret = clock_gettime(CLOCK_MONOTONIC, &current);
+   if (ret < 0)
+      return 0;
+
+   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+#define TIMESTAMP 0x2358
+
+VkResult anv_GetCalibratedTimestampsEXT(
+   VkDevice                                     _device,
+   uint32_t                                     timestampCount,
+   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
+   uint64_t                                     *pTimestamps,
+   uint64_t                                     *pMaxDeviation)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   uint64_t timestamp_frequency = device->info.timestamp_frequency;
+   int  ret;
+   int d;
+   uint64_t begin, end;
+   uint64_t max_clock_period = 0;
+
+   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+   for (d = 0; d < timestampCount; d++) {
+      switch (pTimestampInfos[d].timeDomain) {
+      case VK_TIME_DOMAIN_DEVICE_EXT:
+         ret = anv_gem_reg_read(device, TIMESTAMP | 1,
+                                &pTimestamps[d]);
+
+         if (ret != 0) {
+            device->lost = TRUE;
+            return VK_ERROR_DEVICE_LOST;
+         }
+         uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
+         max_clock_period = MAX2(max_clock_period, device_period);
+         break;
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
+         max_clock_period = MAX2(max_clock_period, 1);
+         break;
+
+      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+         pTimestamps[d] = begin;
+         break;
+      default:
+         pTimestamps[d] = 0;
+         break;
+      }
+   }
+
+   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+    /*
+     * The maximum deviation is the sum of the interval over which we
+     * perform the sampling and the maximum period of any sampled
+     * clock. That's because the maximum skew between any two sampled
+     * clock edges is when the sampled clock with the largest period is
+     * sampled at the end of that period but right at the beginning of the
+     * sampling interval and some other clock is sampled right at the
+     * begining of its sampling period and right at the end of the
+     * sampling interval. Let's assume the GPU has the longest clock
+     * period and that the application is sampling GPU and monotonic:
+     *
+     *                               s                 e
+     *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+     *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+     *
+     *                               g
+     *		  0         1         2         3
+     *	GPU       -----_____-----_____-----_____-----_____
+     *
+     *                                                m
+     *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
+     *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+     *
+     *	Interval                     <----------------->
+     *	Deviation           <-------------------------->
+     *
+     *		s  = read(raw)       2
+     *		g  = read(GPU)       1
+     *		m  = read(monotonic) 2
+     *		e  = read(raw)       b
+     *
+     * We round the sample interval up by one tick to cover sampling error
+     * in the interval clock
+     */
+
+   uint64_t sample_interval = end - begin + 1;
+
+   *pMaxDeviation = sample_interval + max_clock_period;
+
+   return VK_SUCCESS;
+}
+
 /* vk_icd.h does not declare this function, so we declare it here to
  * suppress Wmissing-prototypes.
  */
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index d4915c95013..a8535964da7 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -126,6 +126,7 @@  EXTENSIONS = [
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index c43b5ef9e06..1bdf040c1a3 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -423,6 +423,19 @@  anv_gem_fd_to_handle(struct anv_device *device, int fd)
    return args.handle;
 }
 
+int
+anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
+{
+   struct drm_i915_reg_read args = {
+      .offset = offset
+   };
+
+   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
+
+   *result = args.val;
+   return ret;
+}
+
 #ifndef SYNC_IOC_MAGIC
 /* duplicated from linux/sync_file.h to avoid build-time dependency
  * on new (v4.7) kernel headers.  Once distro's are mostly using
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index 5093bd5db1a..8cc3ad1f22e 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -251,3 +251,10 @@  anv_gem_syncobj_wait(struct anv_device *device,
 {
    unreachable("Unused");
 }
+
+int
+anv_gem_reg_read(struct anv_device *device,
+                 uint32_t offset, uint64_t *result)
+{
+   unreachable("Unused");
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 599b903f25c..08376b00c8e 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1103,6 +1103,8 @@  int anv_gem_get_aperture(int fd, uint64_t *size);
 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
                                 uint32_t *active, uint32_t *pending);
 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
+int anv_gem_reg_read(struct anv_device *device,
+                     uint32_t offset, uint64_t *result);
 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,