vulkan: Add VK_GOOGLE_display_timing extension (x11+display, anv+radv) [v6]
diff mbox series

Message ID 20181115180623.32390-1-keithp@keithp.com
State New
Headers show
Series
  • vulkan: Add VK_GOOGLE_display_timing extension (x11+display, anv+radv) [v6]
Related show

Commit Message

Keith Packard Nov. 15, 2018, 6:06 p.m. UTC
This adds support for the VK_GOOGLE_display timing extension, which
provides two things:

 1) Detailed information about when frames are displayed, including
    slack time between GPU execution and display frame.

 2) Absolute time control over swapchain queue processing. This allows
    the application to request frames be displayed at specific
    absolute times, using the same timebase as that provided in vblank
    events.

Support for this extension has been implemented for the x11 and
display backends; adding support to other backends should be
reasonable straightforward for one familiar with those systems and
should not require any additional device-specific code.

v2:
	Adjust GOOGLE_display_timing earliest value.  The
	earliestPresentTime for an image cannot be before the previous
	image was displayed, or even a frame later (in FIFO mode).

	Make GOOGLE_display_timing use render completed time.  Switch
	from VK_PIPELINE_TOP_OF_PIPE_BIT to
	VK_PIPELINE_STAGE_ALL_COMMANDS_BIT so that the time reported
	to applications as the end of rendering reflects the latest
	possible value to ensure that applications don't underestimate
	the amount of work done in the frame.

v3:
	Adopt Jason Ekstrand's coding conventions.  Declare variables
	at first use, eliminate extra whitespace between types and
	names. Wrap lines to 80 columns.

        Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>

v4:
	Adapt to changes in MESA_query_timestamp extension

v5:
	Squash core bits and anv/radv wrappers into a single patch

        Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>

v6:
	Switch from MESA_query_timestamp to EXT_calibrated_timestamps

Signed-off-by: Keith Packard <keithp@keithp.com>
---
 src/amd/vulkan/radv_extensions.py   |   1 +
 src/amd/vulkan/radv_wsi.c           |  33 +++
 src/intel/vulkan/anv_extensions.py  |   1 +
 src/intel/vulkan/anv_wsi.c          |  33 +++
 src/vulkan/wsi/wsi_common.c         | 303 +++++++++++++++++++++++++++-
 src/vulkan/wsi/wsi_common.h         |  32 +++
 src/vulkan/wsi/wsi_common_display.c | 165 ++++++++++++++-
 src/vulkan/wsi/wsi_common_private.h |  35 ++++
 src/vulkan/wsi/wsi_common_x11.c     |  71 ++++++-
 9 files changed, 662 insertions(+), 12 deletions(-)

Comments

Michel Dänzer Nov. 16, 2018, 9:39 a.m. UTC | #1
On 2018-11-15 7:06 p.m., Keith Packard wrote:
> This adds support for the VK_GOOGLE_display timing extension, which
> provides two things:
> 
>  1) Detailed information about when frames are displayed, including
>     slack time between GPU execution and display frame.
> 
>  2) Absolute time control over swapchain queue processing. This allows
>     the application to request frames be displayed at specific
>     absolute times, using the same timebase as that provided in vblank
>     events.
> 
> Support for this extension has been implemented for the x11 and
> display backends; adding support to other backends should be
> reasonable straightforward for one familiar with those systems and
> should not require any additional device-specific code.
> 
> v2:
> 	Adjust GOOGLE_display_timing earliest value.  The
> 	earliestPresentTime for an image cannot be before the previous
> 	image was displayed, or even a frame later (in FIFO mode).
> 
> 	Make GOOGLE_display_timing use render completed time.  Switch
> 	from VK_PIPELINE_TOP_OF_PIPE_BIT to
> 	VK_PIPELINE_STAGE_ALL_COMMANDS_BIT so that the time reported
> 	to applications as the end of rendering reflects the latest
> 	possible value to ensure that applications don't underestimate
> 	the amount of work done in the frame.
> 
> v3:
> 	Adopt Jason Ekstrand's coding conventions.  Declare variables
> 	at first use, eliminate extra whitespace between types and
> 	names. Wrap lines to 80 columns.
> 
>         Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>
> 
> v4:
> 	Adapt to changes in MESA_query_timestamp extension
> 
> v5:
> 	Squash core bits and anv/radv wrappers into a single patch
> 
>         Suggested-by: Jason Ekstrand <jason.ekstrand@intel.com>
> 
> v6:
> 	Switch from MESA_query_timestamp to EXT_calibrated_timestamps
> 
> Signed-off-by: Keith Packard <keithp@keithp.com>
> 
> [...]
> 
> @@ -979,9 +1187,49 @@ wsi_common_queue_present(const struct wsi_device *wsi,
>            */
>           struct wsi_image *image =
>              swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
> -         submit_info.commandBufferCount = 1;
> -         submit_info.pCommandBuffers =
> -            &image->prime.blit_cmd_buffers[queue_family_index];
> +         submit_buffers[submit_info.commandBufferCount++] = 
> +            image->prime.blit_cmd_buffers[queue_family_index];
> +      }
> +
> +      /* Set up GOOGLE_display_timing bits */
> +      if (present_times_info &&
> +          present_times_info->pTimes != NULL &&
> +          i < present_times_info->swapchainCount)
> +      {
> +         const VkPresentTimeGOOGLE *present_time =
> +            &present_times_info->pTimes[i];
> +
> +         struct wsi_image *image =
> +            swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
> +
> +         timing = wsi_next_timing(swapchain, pPresentInfo->pImageIndices[i]);
> +         timing->timing.presentID = present_time->presentID;
> +         timing->timing.desiredPresentTime = present_time->desiredPresentTime;
> +         timing->target_msc = 0;
> +         image->timing = timing;
> +
> +         if (present_time->desiredPresentTime != 0)
> +         {
> +            int64_t delta_nsec = (int64_t) (present_time->desiredPresentTime -
> +                                            swapchain->frame_ust);
> +
> +            /* Set the target msc only if it's no more than two seconds from
> +             * now, and not stale
> +             */
> +            if (0 <= delta_nsec && delta_nsec <= 2000000000ul) {
> +               VkRefreshCycleDurationGOOGLE refresh_timing;
> +
> +               swapchain->get_refresh_cycle_duration(swapchain,
> +                                                     &refresh_timing);
> +
> +               int64_t refresh = (int64_t) refresh_timing.refreshDuration;
> +               int64_t frames = (delta_nsec + refresh/2) / refresh;

desiredPresentTime has "no sooner than" semantics, so I think this should be

               int64_t frames = (delta_nsec + refresh-1) / refresh;


> +               timing->target_msc = swapchain->frame_msc + frames;
> +            }
> +         }

Note that MSC based timing won't work well with variable refresh rate.
In the long term, support for PresentOptionUST should be implemented and
used.


> @@ -1691,6 +1760,66 @@ wsi_display_queue_present(struct wsi_swapchain *drv_chain,
>  
>     pthread_mutex_lock(&wsi->wait_mutex);
>  
> +   if (image->base.timing && image->base.timing->target_msc != 0) {
> +      VkIcdSurfaceDisplay *surface = chain->surface;
> +      wsi_display_mode *display_mode =
> +         wsi_display_mode_from_handle(surface->displayMode);
> +      wsi_display_connector *connector = display_mode->connector;
> +
> +      wsi_display_debug("delta frame %ld\n",
> +                        image->base.timing->target_msc - connector->last_frame);
> +      if (image->base.timing->target_msc > connector->last_frame) {
> +         uint64_t frame_queued;
> +         VkDisplayKHR display = wsi_display_connector_to_handle(connector);
> +
> +         wsi_display_debug_code(uint64_t current_frame, current_nsec;
> +                                drmCrtcGetSequence(wsi->fd, connector->crtc_id,
> +                                                   &current_frame,
> +                                                   &current_nsec);
> +                                wsi_display_debug("from current: %ld\n",
> +                                                  image->base.timing->target_msc
> +                                                  - current_frame));
> +
> +         image->fence = wsi_display_fence_alloc(chain->base.device,
> +                                                chain->base.wsi,
> +                                                display, &chain->base.alloc);
> +
> +         if (!image->fence) {
> +            result = VK_ERROR_OUT_OF_HOST_MEMORY;
> +            goto bail_unlock;
> +         }
> +
> +         result = wsi_register_vblank_event(image->fence,
> +                                            chain->base.wsi,
> +                                            display,
> +                                            0,
> +                                            image->base.timing->target_msc - 1,
> +                                            &frame_queued);
> +
> +         if (result != VK_SUCCESS)
> +            goto bail_unlock;
> +
> +         /* Check and make sure we are queued for the right frame, otherwise
> +          * just go queue an image
> +          */
> +         if (frame_queued <= image->base.timing->target_msc - 1) {
> +            image->state = WSI_IMAGE_WAITING;
> +
> +            /*
> +             * Don't set the image member until we're going to wait for the
> +             * event to arrive before flipping to the image. That way, if the
> +             * register_vblank_event call happens to process the event, it
> +             * won't actually do anything
> +             */
> +            image->fence->image = image;
> +            wsi_display_start_wait_thread(wsi);
> +            result = VK_SUCCESS;
> +            goto bail_unlock;
> +         }
> +
> +      }
> +   }

What is this code for? At least with X11 Present, shouldn't it be
sufficient to simply pass the target MSC value to x11_present_to_x11?
Keith Packard Nov. 17, 2018, 9:34 p.m. UTC | #2
Michel Dänzer <michel@daenzer.net> writes:

Thanks for taking time to review this patch!

>> +               int64_t refresh = (int64_t) refresh_timing.refreshDuration;
>> +               int64_t frames = (delta_nsec + refresh/2) / refresh;
>
> desiredPresentTime has "no sooner than" semantics, so I think this should be
>
>                int64_t frames = (delta_nsec + refresh-1) / refresh;

Hrm. You're certainly right that we want to make sure to not hit the
wrong frame, and we need to be very careful with this computation. And
that turns out to be 'hard'.

With a naïve computation of frame times:

        future_frame_time = past_frame_time + n * refresh

If the reported refresh is longer than the actual interval, due
to rounding of that value or clock skew, this computation might select
frame n+1 if the driver uses a later frame for its basis than the
application:

        desiredPresentTime = application_past_frame_time + n * refresh

        delta_nsec = (desiredPresentTime - driver_past_frame_time);
        frames = (delta_nsec + refresh-1) / refresh;

If 'driver_past_frame_time' was sampled 'm' frames after
'application_past_frame time', and 'refresh' is longer than the
actual frame time:

        desiredPresentTime > driver_past_frame_time + m * refresh

Because desiredPresentTime is *past* our estimate of the beginning of
the frame the application wants, and because we're rounding the selected
frame up, we end up targeting one frame too late.

Now, if we use my value for 'frames', then we hit the right frame using
this value, as long as the error is less than 1/2 frame time:

        desiredPresentTime > driver_past_frame_time + m * refresh
        desiredPresentTime < driver_past_frame_time + m * refresh + refresh/2

        delta_nsec > m * refresh
        delta_nsec < m * refresh + refresh / 

        frames > (m * refresh + refresh/2) / refresh
        frames < (m * refresh + refresh) / refresh

        With this computation, frames = m, which is the desired result.

So at least you can see where my code came from. But, it's clearly wrong
according to the spec, as you'll see in the next section.

An application can attempt to compensate for this by using an earlier
time; a slightly less naïve computation might look like:

        future_frame_time = past_frame_time + 1 + (n-1) * refresh

This makes 'future_frame_time' be the earliest possible time that should
target the desired frame, given the 'no sooner than' semantics in the
spec.

If the reported refresh is shorter than the actual interval, this
computation might hit frame (n-1).

Ok, so now we make the application even 'smarter' by having it compute a
time in the center of the target frame:

        future_frame_time = past_frame_time + (refresh/2) + (n-1) * refresh

With your suggested code, this will hit the desired frame unless the
error in the frame time is more than 1/2 of the refresh interval, which
seems pretty good.

Ok, so what can we do? I think we start with what we know:

 * driver_past_frame_time >= application_past_frame_time

   Because all application frame time information comes from the driver,
   we just need to use the latest possible frame time in the driver to
   keep this true.

Now, what will cause errors in the 'refresh' value? 'refresh' error is a
combination of rounding error and CPU vs GPU clock skew.

 * Rounding error. This is always less than 1ns.

 * Clock skew is related to the performance of a couple of crystals in
   the system.

   Even cheap crystals provide significantly better than 100ppm (parts
   per million) performance. At 30Hz, refresh_interval is 33.3ms, or
   33,333,333ns, so each crystals will have a maximum error of 3300ns;
   combine two and we've a maximum error of 6600ns.

As you can see, the rounding error is lost in the noise here, unless we
find a system that uses CLOCK_MONOTONIC for display timing. It'll take
5000 frames before that error reaches a frame time.

As long as the application_past_frame_time is within 2500 frames of the
driver_past_frame_time, the error in the future_frame_time estimate will
be within one-half frame, and our application will work reliably using
the 'smarter' computation of future frame time.

I would prefer to let applications use the initial naïve
future_frame_time estimate, as I think that could also work with
variable refresh timing, but that would require a fairly complicated
change in the specification.

>> +               timing->target_msc = swapchain->frame_msc + frames;
>> +            }
>> +         }
>
> Note that MSC based timing won't work well with variable refresh rate.
> In the long term, support for PresentOptionUST should be implemented and
> used.

Agreed. Given the above discussion, I think that will have to wait for a
more sophisticated specification for what 'desiredPresentTime' means, as
I think the current specification makes it "impossible" to provide an
actual desiredPresentTime to the interface without that causing
occasional incorrect frame selection.

> What is this code for? At least with X11 Present, shouldn't it be
> sufficient to simply pass the target MSC value to x11_present_to_x11?

This is the direct display back end which uses DRM interfaces in the
kernel. Those interfaces do not support queuing a flip for anything
other than the next frame.

I'll update this patch to correct the computation of the next frame from
'nearest' to 'no sooner than'. Following the spec is certainly better
than making applications simpler as any application which does follow
the spec is going to get the wrong answer...

Patch
diff mbox series

diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 6bdf988d117..76c3ade06f0 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -119,6 +119,7 @@  EXTENSIONS = [
     Extension('VK_AMD_shader_trinary_minmax',             1, True),
     Extension('VK_GOOGLE_decorate_string',                1, True),
     Extension('VK_GOOGLE_hlsl_functionality1',            1, True),
+    Extension('VK_GOOGLE_display_timing',                 1, True),
 ]
 
 class VkVersion:
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 346fb43d675..ba24d07edfc 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -295,3 +295,36 @@  VkResult radv_GetPhysicalDevicePresentRectanglesKHR(
 						 surface,
 						 pRectCount, pRects);
 }
+
+/* VK_GOOGLE_display_timing */
+VkResult
+radv_GetRefreshCycleDurationGOOGLE(
+	VkDevice _device,
+	VkSwapchainKHR swapchain,
+	VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	struct radv_physical_device *pdevice = device->physical_device;
+
+	return wsi_common_get_refresh_cycle_duration(&pdevice->wsi_device,
+						     _device,
+						     swapchain,
+						     pDisplayTimingProperties);
+}
+
+VkResult
+radv_GetPastPresentationTimingGOOGLE(VkDevice _device,
+				     VkSwapchainKHR swapchain,
+				     uint32_t *pPresentationTimingCount,
+				     VkPastPresentationTimingGOOGLE
+				     *pPresentationTimings)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	struct radv_physical_device *pdevice = device->physical_device;
+
+	return wsi_common_get_past_presentation_timing(&pdevice->wsi_device,
+						       _device,
+						       swapchain,
+						       pPresentationTimingCount,
+						       pPresentationTimings);
+}
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index e9afe06bb13..8fcc4d1376e 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -130,6 +130,7 @@  EXTENSIONS = [
     Extension('VK_EXT_calibrated_timestamps',             1, True),
     Extension('VK_GOOGLE_decorate_string',                1, True),
     Extension('VK_GOOGLE_hlsl_functionality1',            1, True),
+    Extension('VK_GOOGLE_display_timing',                 1, True),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 024bc1c245d..bfdbcb56c56 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -305,3 +305,36 @@  VkResult anv_GetPhysicalDevicePresentRectanglesKHR(
                                             surface,
                                             pRectCount, pRects);
 }
+
+/* VK_GOOGLE_display_timing */
+VkResult
+anv_GetRefreshCycleDurationGOOGLE(VkDevice _device,
+                                  VkSwapchainKHR swapchain,
+                                  VkRefreshCycleDurationGOOGLE
+                                  *pDisplayTimingProperties)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   return wsi_common_get_refresh_cycle_duration(&device->instance->
+                                                physicalDevice.wsi_device,
+                                                _device,
+                                                swapchain,
+                                                pDisplayTimingProperties);
+}
+
+VkResult
+anv_GetPastPresentationTimingGOOGLE(VkDevice _device,
+                                    VkSwapchainKHR swapchain,
+                                    uint32_t *pPresentationTimingCount,
+                                    VkPastPresentationTimingGOOGLE
+                                    *pPresentationTimings)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+
+   return wsi_common_get_past_presentation_timing(&device->instance->
+                                                  physicalDevice.wsi_device,
+                                                  _device,
+                                                  swapchain,
+                                                  pPresentationTimingCount,
+                                                  pPresentationTimings);
+}
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index 1cd5f8d62c5..562d2b8017e 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -48,6 +48,7 @@  wsi_device_init(struct wsi_device *wsi,
    WSI_GET_CB(GetPhysicalDeviceProperties2);
    WSI_GET_CB(GetPhysicalDeviceMemoryProperties);
    WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_GET_CB(GetPhysicalDeviceProperties);
 #undef WSI_GET_CB
 
    wsi->pci_bus_info.sType =
@@ -61,6 +62,10 @@  wsi_device_init(struct wsi_device *wsi,
    GetPhysicalDeviceMemoryProperties(pdevice, &wsi->memory_props);
    GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, NULL);
 
+   VkPhysicalDeviceProperties properties;
+   GetPhysicalDeviceProperties(pdevice, &properties);
+   wsi->timestamp_period = properties.limits.timestampPeriod;
+
 #define WSI_GET_CB(func) \
    wsi->func = (PFN_vk##func)proc_addr(pdevice, "vk" #func)
    WSI_GET_CB(AllocateMemory);
@@ -69,14 +74,18 @@  wsi_device_init(struct wsi_device *wsi,
    WSI_GET_CB(BindImageMemory);
    WSI_GET_CB(BeginCommandBuffer);
    WSI_GET_CB(CmdCopyImageToBuffer);
+   WSI_GET_CB(CmdResetQueryPool);
+   WSI_GET_CB(CmdWriteTimestamp);
    WSI_GET_CB(CreateBuffer);
    WSI_GET_CB(CreateCommandPool);
    WSI_GET_CB(CreateFence);
    WSI_GET_CB(CreateImage);
+   WSI_GET_CB(CreateQueryPool);
    WSI_GET_CB(DestroyBuffer);
    WSI_GET_CB(DestroyCommandPool);
    WSI_GET_CB(DestroyFence);
    WSI_GET_CB(DestroyImage);
+   WSI_GET_CB(DestroyQueryPool);
    WSI_GET_CB(EndCommandBuffer);
    WSI_GET_CB(FreeMemory);
    WSI_GET_CB(FreeCommandBuffers);
@@ -84,10 +93,14 @@  wsi_device_init(struct wsi_device *wsi,
    WSI_GET_CB(GetImageMemoryRequirements);
    WSI_GET_CB(GetImageSubresourceLayout);
    WSI_GET_CB(GetMemoryFdKHR);
+   WSI_GET_CB(GetPhysicalDeviceProperties);
    WSI_GET_CB(GetPhysicalDeviceFormatProperties);
    WSI_GET_CB(GetPhysicalDeviceFormatProperties2KHR);
+   WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_GET_CB(GetQueryPoolResults);
    WSI_GET_CB(ResetFences);
    WSI_GET_CB(QueueSubmit);
+   WSI_GET_CB(GetCalibratedTimestampsEXT);
    WSI_GET_CB(WaitForFences);
 #undef WSI_GET_CB
 
@@ -172,6 +185,8 @@  wsi_swapchain_init(const struct wsi_device *wsi,
    chain->device = device;
    chain->alloc = *pAllocator;
    chain->use_prime_blit = false;
+   chain->timing_insert = 0;
+   chain->timing_count = 0;
 
    chain->cmd_pools =
       vk_zalloc(pAllocator, sizeof(VkCommandPool) * wsi->queue_family_count, 8,
@@ -245,6 +260,63 @@  align_u32(uint32_t v, uint32_t a)
    return (v + a - 1) & ~(a - 1);
 }
 
+static VkResult
+wsi_image_init_timestamp(const struct wsi_swapchain *chain,
+                         struct wsi_image *image)
+{
+   const struct wsi_device *wsi = chain->wsi;
+   VkResult result;
+   /* Set up command buffer to get timestamp info */
+
+   result = wsi->CreateQueryPool(
+      chain->device,
+      &(const VkQueryPoolCreateInfo) {
+         .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+            .queryType = VK_QUERY_TYPE_TIMESTAMP,
+            .queryCount = 1,
+            },
+      NULL,
+      &image->query_pool);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   result = wsi->AllocateCommandBuffers(
+      chain->device,
+      &(const VkCommandBufferAllocateInfo) {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+            .pNext = NULL,
+            .commandPool = chain->cmd_pools[0],
+            .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+            .commandBufferCount = 1,
+            },
+      &image->timestamp_buffer);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   wsi->BeginCommandBuffer(
+      image->timestamp_buffer,
+      &(VkCommandBufferBeginInfo) {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+            .flags = 0
+            });
+
+   wsi->CmdResetQueryPool(image->timestamp_buffer,
+                          image->query_pool,
+                          0, 1);
+
+   wsi->CmdWriteTimestamp(image->timestamp_buffer,
+                          VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                          image->query_pool,
+                          0);
+
+   wsi->EndCommandBuffer(image->timestamp_buffer);
+
+   return VK_SUCCESS;
+fail:
+   return result;
+}
+
 VkResult
 wsi_create_native_image(const struct wsi_swapchain *chain,
                         const VkSwapchainCreateInfoKHR *pCreateInfo,
@@ -418,6 +490,10 @@  wsi_create_native_image(const struct wsi_swapchain *chain,
    if (result != VK_SUCCESS)
       goto fail;
 
+   result = wsi_image_init_timestamp(chain, image);
+   if (result != VK_SUCCESS)
+      goto fail;
+
    if (num_modifier_lists > 0) {
       image->drm_modifier = wsi->image_get_modifier(image->image);
       assert(image->drm_modifier != DRM_FORMAT_MOD_INVALID);
@@ -663,6 +739,10 @@  wsi_create_prime_image(const struct wsi_swapchain *chain,
          goto fail;
    }
 
+   result = wsi_image_init_timestamp(chain, image);
+   if (result != VK_SUCCESS)
+      goto fail;
+
    const VkMemoryGetFdInfoKHR linear_memory_get_fd_info = {
       .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
       .pNext = NULL,
@@ -913,6 +993,128 @@  wsi_common_acquire_next_image2(const struct wsi_device *wsi,
    return swapchain->acquire_next_image(swapchain, pAcquireInfo, pImageIndex);
 }
 
+static struct wsi_timing *
+wsi_get_timing(struct wsi_swapchain *chain, uint32_t i)
+{
+   uint32_t j = WSI_TIMING_HISTORY + chain->timing_insert -
+      chain->timing_count + i;
+
+   if (j >= WSI_TIMING_HISTORY)
+      j -= WSI_TIMING_HISTORY;
+   return &chain->timing[j];
+}
+
+static struct wsi_timing *
+wsi_next_timing(struct wsi_swapchain *chain, int image_index)
+{
+   uint32_t j = chain->timing_insert;
+   ++chain->timing_insert;
+   if (chain->timing_insert >= WSI_TIMING_HISTORY)
+      chain->timing_insert = 0;
+   if (chain->timing_count < WSI_TIMING_HISTORY)
+      ++chain->timing_count;
+   struct wsi_timing *timing = &chain->timing[j];
+   memset(timing, '\0', sizeof (*timing));
+   return timing;
+}
+
+void
+wsi_mark_timing(struct wsi_swapchain *swapchain,
+                struct wsi_image *image,
+                uint64_t ust,
+                uint64_t msc)
+{
+   const struct wsi_device *wsi = swapchain->wsi;
+   struct wsi_timing *timing = image->timing;
+
+   if (!timing)
+      return;
+
+   uint64_t render_timestamp;
+
+   VkResult result = wsi->GetQueryPoolResults(
+      swapchain->device, image->query_pool,
+      0, 1, sizeof(render_timestamp), &render_timestamp,
+      sizeof (uint64_t),
+      VK_QUERY_RESULT_64_BIT|VK_QUERY_RESULT_WAIT_BIT);
+   if (result != VK_SUCCESS)
+      return;
+
+   static const VkCalibratedTimestampInfoEXT    timestampInfo[2] = {
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
+         .pNext = NULL,
+         .timeDomain = VK_TIME_DOMAIN_DEVICE_EXT,
+      },
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT,
+         .pNext = NULL,
+         .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+      },
+   };
+
+   uint64_t     timestamps[2];
+   uint64_t     maxDeviation;
+
+   result = wsi->GetCalibratedTimestampsEXT(swapchain->device,
+                                            2,
+                                            timestampInfo,
+                                            timestamps,
+                                            &maxDeviation);
+   if (result != VK_SUCCESS)
+      return;
+
+   uint64_t current_gpu_timestamp = timestamps[0];
+   uint64_t current_time = timestamps[1];
+
+   VkRefreshCycleDurationGOOGLE display_timings;
+   swapchain->get_refresh_cycle_duration(swapchain, &display_timings);
+
+   uint64_t refresh_duration = display_timings.refreshDuration;
+
+   /* When did drawing complete (in nsec) */
+
+   int64_t since_render = (int64_t) floor ((double) (current_gpu_timestamp - render_timestamp) *
+                                           (double) wsi->timestamp_period + 0.5);
+   uint64_t render_time = current_time - since_render;
+
+   if (render_time > ust)
+      render_time = ust;
+
+   uint64_t render_frames = (ust - render_time) / refresh_duration;
+
+   uint64_t earliest_time = ust - render_frames * refresh_duration;
+
+   /* Use the presentation mode to figure out when the image could have been
+    * displayed. It couldn't have been displayed before the previous image, so
+    * use that as a lower bound. If we're in FIFO mode, then it couldn't have
+    * been displayed before one frame *after* the previous image
+    */
+   uint64_t possible_frame = swapchain->frame_ust;
+
+   switch (swapchain->present_mode) {
+   case VK_PRESENT_MODE_FIFO_KHR:
+   case VK_PRESENT_MODE_FIFO_RELAXED_KHR:
+      possible_frame += refresh_duration;
+      break;
+   default:
+      break;
+   }
+   if (earliest_time < possible_frame)
+      earliest_time = possible_frame;
+
+   if (earliest_time > ust)
+      earliest_time = ust;
+
+   timing->timing.actualPresentTime = ust;
+   timing->timing.earliestPresentTime = earliest_time;
+   timing->timing.presentMargin = earliest_time - render_time;
+   timing->complete = true;
+
+   swapchain->frame_msc = msc;
+   swapchain->frame_ust = ust;
+}
+
 VkResult
 wsi_common_queue_present(const struct wsi_device *wsi,
                          VkDevice device,
@@ -924,10 +1126,13 @@  wsi_common_queue_present(const struct wsi_device *wsi,
 
    const VkPresentRegionsKHR *regions =
       vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
+   const VkPresentTimesInfoGOOGLE *present_times_info =
+      vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMES_INFO_GOOGLE);
 
    for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
       WSI_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
       VkResult result;
+      struct wsi_timing *timing = NULL;
 
       if (swapchain->fences[0] == VK_NULL_HANDLE) {
          const VkFenceCreateInfo fence_info = {
@@ -944,9 +1149,12 @@  wsi_common_queue_present(const struct wsi_device *wsi,
          wsi->ResetFences(device, 1, &swapchain->fences[0]);
       }
 
+      VkCommandBuffer submit_buffers[2];
       VkSubmitInfo submit_info = {
          .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
          .pNext = NULL,
+         .pCommandBuffers = submit_buffers,
+         .commandBufferCount = 0
       };
 
       VkPipelineStageFlags *stage_flags = NULL;
@@ -979,9 +1187,49 @@  wsi_common_queue_present(const struct wsi_device *wsi,
           */
          struct wsi_image *image =
             swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
-         submit_info.commandBufferCount = 1;
-         submit_info.pCommandBuffers =
-            &image->prime.blit_cmd_buffers[queue_family_index];
+         submit_buffers[submit_info.commandBufferCount++] = 
+            image->prime.blit_cmd_buffers[queue_family_index];
+      }
+
+      /* Set up GOOGLE_display_timing bits */
+      if (present_times_info &&
+          present_times_info->pTimes != NULL &&
+          i < present_times_info->swapchainCount)
+      {
+         const VkPresentTimeGOOGLE *present_time =
+            &present_times_info->pTimes[i];
+
+         struct wsi_image *image =
+            swapchain->get_wsi_image(swapchain, pPresentInfo->pImageIndices[i]);
+
+         timing = wsi_next_timing(swapchain, pPresentInfo->pImageIndices[i]);
+         timing->timing.presentID = present_time->presentID;
+         timing->timing.desiredPresentTime = present_time->desiredPresentTime;
+         timing->target_msc = 0;
+         image->timing = timing;
+
+         if (present_time->desiredPresentTime != 0)
+         {
+            int64_t delta_nsec = (int64_t) (present_time->desiredPresentTime -
+                                            swapchain->frame_ust);
+
+            /* Set the target msc only if it's no more than two seconds from
+             * now, and not stale
+             */
+            if (0 <= delta_nsec && delta_nsec <= 2000000000ul) {
+               VkRefreshCycleDurationGOOGLE refresh_timing;
+
+               swapchain->get_refresh_cycle_duration(swapchain,
+                                                     &refresh_timing);
+
+               int64_t refresh = (int64_t) refresh_timing.refreshDuration;
+               int64_t frames = (delta_nsec + refresh/2) / refresh;
+               timing->target_msc = swapchain->frame_msc + frames;
+            }
+         }
+
+         submit_buffers[submit_info.commandBufferCount++] =
+            image->timestamp_buffer;
       }
 
       result = wsi->QueueSubmit(queue, 1, &submit_info, swapchain->fences[0]);
@@ -1019,3 +1267,52 @@  wsi_common_queue_present(const struct wsi_device *wsi,
 
    return final_result;
 }
+
+VkResult
+wsi_common_get_refresh_cycle_duration(
+   const struct wsi_device *wsi,
+   VkDevice device_h,
+   VkSwapchainKHR _swapchain,
+   VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties)
+{
+   WSI_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+
+   if (!swapchain->get_refresh_cycle_duration)
+      return VK_ERROR_EXTENSION_NOT_PRESENT;
+   return swapchain->get_refresh_cycle_duration(swapchain,
+                                                pDisplayTimingProperties);
+}
+
+
+VkResult
+wsi_common_get_past_presentation_timing(
+   const struct wsi_device *wsi,
+   VkDevice device_h,
+   VkSwapchainKHR _swapchain,
+   uint32_t *count,
+   VkPastPresentationTimingGOOGLE *timings)
+{
+   WSI_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+   uint32_t timing_count_requested = *count;
+   uint32_t timing_count_available = 0;
+
+   /* Count the number of completed entries, copy */
+   for (uint32_t t = 0; t < swapchain->timing_count; t++) {
+      struct wsi_timing *timing = wsi_get_timing(swapchain, t);
+
+      if (timing->complete && !timing->consumed) {
+         if (timings && timing_count_available < timing_count_requested) {
+            timings[timing_count_available] = timing->timing;
+            timing->consumed = true;
+         }
+         timing_count_available++;
+      }
+   }
+
+   *count = timing_count_available;
+
+   if (timing_count_available > timing_count_requested && timings != NULL)
+      return VK_INCOMPLETE;
+
+   return VK_SUCCESS;
+}
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index f6ca013c2a8..f07cf3a5880 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -96,6 +96,7 @@  struct wsi_device {
    VkPhysicalDevice pdevice;
    VkPhysicalDeviceMemoryProperties memory_props;
    uint32_t queue_family_count;
+   float timestamp_period;
 
    VkPhysicalDevicePCIBusInfoPropertiesEXT pci_bus_info;
 
@@ -109,14 +110,18 @@  struct wsi_device {
    WSI_CB(BindImageMemory);
    WSI_CB(BeginCommandBuffer);
    WSI_CB(CmdCopyImageToBuffer);
+   WSI_CB(CmdResetQueryPool);
+   WSI_CB(CmdWriteTimestamp);
    WSI_CB(CreateBuffer);
    WSI_CB(CreateCommandPool);
    WSI_CB(CreateFence);
    WSI_CB(CreateImage);
+   WSI_CB(CreateQueryPool);
    WSI_CB(DestroyBuffer);
    WSI_CB(DestroyCommandPool);
    WSI_CB(DestroyFence);
    WSI_CB(DestroyImage);
+   WSI_CB(DestroyQueryPool);
    WSI_CB(EndCommandBuffer);
    WSI_CB(FreeMemory);
    WSI_CB(FreeCommandBuffers);
@@ -124,10 +129,14 @@  struct wsi_device {
    WSI_CB(GetImageMemoryRequirements);
    WSI_CB(GetImageSubresourceLayout);
    WSI_CB(GetMemoryFdKHR);
+   WSI_CB(GetPhysicalDeviceProperties);
    WSI_CB(GetPhysicalDeviceFormatProperties);
    WSI_CB(GetPhysicalDeviceFormatProperties2KHR);
+   WSI_CB(GetPhysicalDeviceQueueFamilyProperties);
+   WSI_CB(GetQueryPoolResults);
    WSI_CB(ResetFences);
    WSI_CB(QueueSubmit);
+   WSI_CB(GetCalibratedTimestampsEXT);
    WSI_CB(WaitForFences);
 #undef WSI_CB
 
@@ -241,4 +250,27 @@  wsi_common_queue_present(const struct wsi_device *wsi,
                          int queue_family_index,
                          const VkPresentInfoKHR *pPresentInfo);
 
+VkResult
+wsi_common_convert_timestamp(const struct wsi_device *wsi,
+                             VkDevice device_h,
+                             VkSurfaceKHR surface_h,
+                             uint64_t monotonic_timestamp,
+                             uint64_t *surface_timestamp);
+
+/* VK_GOOGLE_display_timing */
+VkResult
+wsi_common_get_refresh_cycle_duration(const struct wsi_device *wsi,
+                                      VkDevice device_h,
+                                      VkSwapchainKHR swapchain,
+                                      VkRefreshCycleDurationGOOGLE
+                                      *pDisplayTimingProperties);
+
+VkResult
+wsi_common_get_past_presentation_timing(const struct wsi_device *wsi,
+                                        VkDevice device_h,
+                                        VkSwapchainKHR swapchain,
+                                        uint32_t *pPresentationTimingCount,
+                                        VkPastPresentationTimingGOOGLE
+                                        *pPresentationTimings);
+
 #endif
diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c
index fd0d30ad80c..5f4d16bf17b 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -76,6 +76,8 @@  typedef struct wsi_display_connector {
    char                         *name;
    bool                         connected;
    bool                         active;
+   uint64_t                     last_frame;
+   uint64_t                     last_nsec;
    struct list_head             display_modes;
    wsi_display_mode             *current_mode;
    drmModeModeInfo              current_drm_mode;
@@ -110,6 +112,7 @@  struct wsi_display {
 enum wsi_image_state {
    WSI_IMAGE_IDLE,
    WSI_IMAGE_DRAWING,
+   WSI_IMAGE_WAITING,
    WSI_IMAGE_QUEUED,
    WSI_IMAGE_FLIPPING,
    WSI_IMAGE_DISPLAYING
@@ -119,6 +122,7 @@  struct wsi_display_image {
    struct wsi_image             base;
    struct wsi_display_swapchain *chain;
    enum wsi_image_state         state;
+   struct wsi_display_fence     *fence;
    uint32_t                     fb_id;
    uint32_t                     buffer[4];
    uint64_t                     flip_sequence;
@@ -129,6 +133,7 @@  struct wsi_display_swapchain {
    struct wsi_display           *wsi;
    VkIcdSurfaceDisplay          *surface;
    uint64_t                     flip_sequence;
+   const VkAllocationCallbacks  *allocator;
    VkResult                     status;
    struct wsi_display_image     images[0];
 };
@@ -138,6 +143,7 @@  struct wsi_display_fence {
    bool                         event_received;
    bool                         destroyed;
    uint64_t                     sequence;
+   struct wsi_display_image     *image;
 };
 
 static uint64_t fence_sequence;
@@ -1006,6 +1012,7 @@  wsi_display_image_init(VkDevice device_h,
 
    image->chain = chain;
    image->state = WSI_IMAGE_IDLE;
+   image->fence = NULL;
    image->fb_id = 0;
 
    int ret = drmModeAddFB2(wsi->fd,
@@ -1095,6 +1102,11 @@  wsi_display_idle_old_displaying(struct wsi_display_image *active_image)
 static VkResult
 _wsi_display_queue_next(struct wsi_swapchain *drv_chain);
 
+static uint64_t widen_32_to_64(uint32_t narrow, uint64_t near)
+{
+	return near + (int32_t) (narrow - near);
+}
+
 static void
 wsi_display_page_flip_handler2(int fd,
                                unsigned int frame,
@@ -1105,17 +1117,38 @@  wsi_display_page_flip_handler2(int fd,
 {
    struct wsi_display_image *image = data;
    struct wsi_display_swapchain *chain = image->chain;
+   VkIcdSurfaceDisplay *surface = chain->surface;
+   wsi_display_mode *display_mode =
+      wsi_display_mode_from_handle(surface->displayMode);
+   wsi_display_connector *connector = display_mode->connector;
+   uint64_t nsec = (uint64_t) sec * 1000000000ull + (uint64_t) usec * 1000;
 
    wsi_display_debug("image %ld displayed at %d\n",
                      image - &(image->chain->images[0]), frame);
+
+   /* Don't let time go backwards because this function has lower resolution
+    * than ktime */
+
+   if (nsec < connector->last_nsec)
+      nsec = connector->last_nsec;
+
    image->state = WSI_IMAGE_DISPLAYING;
+
+   uint64_t frame64 = widen_32_to_64(frame, connector->last_frame);
+
+   connector->last_frame = frame64;
+   connector->last_nsec = nsec;
+   wsi_mark_timing(&image->chain->base, &image->base,
+                   nsec, frame64);
    wsi_display_idle_old_displaying(image);
    VkResult result = _wsi_display_queue_next(&(chain->base));
    if (result != VK_SUCCESS)
       chain->status = result;
 }
 
-static void wsi_display_fence_event_handler(struct wsi_display_fence *fence);
+static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
+                                            uint64_t nsec,
+                                            uint64_t frame);
 
 static void wsi_display_page_flip_handler(int fd,
                                           unsigned int frame,
@@ -1131,8 +1164,17 @@  static void wsi_display_vblank_handler(int fd, unsigned int frame,
                                        void *data)
 {
    struct wsi_display_fence *fence = data;
+   struct wsi_display_connector *connector =
+      wsi_display_connector_from_handle(fence->base.display);
+   uint64_t frame64 = widen_32_to_64(frame, connector->last_frame);
+   uint64_t nsec = (uint64_t) sec * 1000000000 + (uint64_t) usec * 1000;
 
-   wsi_display_fence_event_handler(fence);
+   /* Don't let time go backwards because this function has lower resolution
+    * than ktime */
+   if (nsec < connector->last_nsec)
+      nsec = connector->last_nsec;
+
+   wsi_display_fence_event_handler(fence, nsec, frame64);
 }
 
 static void wsi_display_sequence_handler(int fd, uint64_t frame,
@@ -1141,7 +1183,7 @@  static void wsi_display_sequence_handler(int fd, uint64_t frame,
    struct wsi_display_fence *fence =
       (struct wsi_display_fence *) (uintptr_t) user_data;
 
-   wsi_display_fence_event_handler(fence);
+   wsi_display_fence_event_handler(fence, nsec, frame);
 }
 
 static drmEventContext event_context = {
@@ -1473,12 +1515,31 @@  wsi_display_fence_check_free(struct wsi_display_fence *fence)
       vk_free(fence->base.alloc, fence);
 }
 
-static void wsi_display_fence_event_handler(struct wsi_display_fence *fence)
+static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
+                                            uint64_t nsec,
+                                            uint64_t frame)
 {
+   struct wsi_display_connector *connector =
+      wsi_display_connector_from_handle(fence->base.display);
+   struct wsi_display_image *image = fence->image;
+
+   wsi_display_debug("%9lu fence %lu received %lu nsec %lu\n",
+                     pthread_self(), fence->sequence, frame, nsec);
+
+   connector->last_nsec = nsec;
+   connector->last_frame = frame;
    fence->event_received = true;
    wsi_display_fence_check_free(fence);
+   if (image) {
+      image->flip_sequence = ++image->chain->flip_sequence;
+      image->state = WSI_IMAGE_QUEUED;
+      VkResult result = _wsi_display_queue_next(&image->chain->base);
+      if (result != VK_SUCCESS)
+         image->chain->status = result;
+   }
 }
 
+
 static void
 wsi_display_fence_destroy(struct wsi_fence *fence_wsi)
 {
@@ -1513,6 +1574,7 @@  wsi_display_fence_alloc(VkDevice device,
    fence->event_received = false;
    fence->destroyed = false;
    fence->sequence = ++fence_sequence;
+   fence->image = NULL;
    return fence;
 }
 
@@ -1620,7 +1682,14 @@  _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
       if (!image)
          return VK_SUCCESS;
 
+      if (image->fence) {
+         image->fence->image = NULL;
+         wsi_display_fence_destroy(&image->fence->base);
+         image->fence = NULL;
+      }
+
       int ret;
+
       if (connector->active) {
          ret = drmModePageFlip(wsi->fd, connector->crtc_id, image->fb_id,
                                    DRM_MODE_PAGE_FLIP_EVENT, image);
@@ -1691,6 +1760,66 @@  wsi_display_queue_present(struct wsi_swapchain *drv_chain,
 
    pthread_mutex_lock(&wsi->wait_mutex);
 
+   if (image->base.timing && image->base.timing->target_msc != 0) {
+      VkIcdSurfaceDisplay *surface = chain->surface;
+      wsi_display_mode *display_mode =
+         wsi_display_mode_from_handle(surface->displayMode);
+      wsi_display_connector *connector = display_mode->connector;
+
+      wsi_display_debug("delta frame %ld\n",
+                        image->base.timing->target_msc - connector->last_frame);
+      if (image->base.timing->target_msc > connector->last_frame) {
+         uint64_t frame_queued;
+         VkDisplayKHR display = wsi_display_connector_to_handle(connector);
+
+         wsi_display_debug_code(uint64_t current_frame, current_nsec;
+                                drmCrtcGetSequence(wsi->fd, connector->crtc_id,
+                                                   &current_frame,
+                                                   &current_nsec);
+                                wsi_display_debug("from current: %ld\n",
+                                                  image->base.timing->target_msc
+                                                  - current_frame));
+
+         image->fence = wsi_display_fence_alloc(chain->base.device,
+                                                chain->base.wsi,
+                                                display, &chain->base.alloc);
+
+         if (!image->fence) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto bail_unlock;
+         }
+
+         result = wsi_register_vblank_event(image->fence,
+                                            chain->base.wsi,
+                                            display,
+                                            0,
+                                            image->base.timing->target_msc - 1,
+                                            &frame_queued);
+
+         if (result != VK_SUCCESS)
+            goto bail_unlock;
+
+         /* Check and make sure we are queued for the right frame, otherwise
+          * just go queue an image
+          */
+         if (frame_queued <= image->base.timing->target_msc - 1) {
+            image->state = WSI_IMAGE_WAITING;
+
+            /*
+             * Don't set the image member until we're going to wait for the
+             * event to arrive before flipping to the image. That way, if the
+             * register_vblank_event call happens to process the event, it
+             * won't actually do anything
+             */
+            image->fence->image = image;
+            wsi_display_start_wait_thread(wsi);
+            result = VK_SUCCESS;
+            goto bail_unlock;
+         }
+
+      }
+   }
+
    image->flip_sequence = ++chain->flip_sequence;
    image->state = WSI_IMAGE_QUEUED;
 
@@ -1698,6 +1827,7 @@  wsi_display_queue_present(struct wsi_swapchain *drv_chain,
    if (result != VK_SUCCESS)
       chain->status = result;
 
+bail_unlock:
    pthread_mutex_unlock(&wsi->wait_mutex);
 
    if (result != VK_SUCCESS)
@@ -1706,6 +1836,21 @@  wsi_display_queue_present(struct wsi_swapchain *drv_chain,
    return chain->status;
 }
 
+static VkResult
+wsi_display_get_refresh_cycle_duration(struct wsi_swapchain *drv_chain,
+                                       VkRefreshCycleDurationGOOGLE *duration)
+{
+   struct wsi_display_swapchain *chain =
+      (struct wsi_display_swapchain *) drv_chain;
+   VkIcdSurfaceDisplay *surface = chain->surface;
+   wsi_display_mode *display_mode =
+      wsi_display_mode_from_handle(surface->displayMode);
+   double refresh = wsi_display_mode_refresh(display_mode);
+
+   duration->refreshDuration = (uint64_t) (floor (1.0/refresh * 1e9 + 0.5));
+   return VK_SUCCESS;
+}
+
 static VkResult
 wsi_display_surface_create_swapchain(
    VkIcdSurfaceBase *icd_surface,
@@ -1740,10 +1885,13 @@  wsi_display_surface_create_swapchain(
    chain->base.get_wsi_image = wsi_display_get_wsi_image;
    chain->base.acquire_next_image = wsi_display_acquire_next_image;
    chain->base.queue_present = wsi_display_queue_present;
+   chain->base.get_refresh_cycle_duration =
+      wsi_display_get_refresh_cycle_duration;
    chain->base.present_mode = create_info->presentMode;
    chain->base.image_count = num_images;
 
    chain->wsi = wsi;
+   chain->allocator = allocator;
    chain->status = VK_SUCCESS;
 
    chain->surface = (VkIcdSurfaceDisplay *) icd_surface;
@@ -2479,9 +2627,14 @@  wsi_get_swapchain_counter(VkDevice device,
       return VK_SUCCESS;
    }
 
-   int ret = drmCrtcGetSequence(wsi->fd, connector->crtc_id, value, NULL);
-   if (ret)
+   uint64_t nsec;
+   int ret = drmCrtcGetSequence(wsi->fd, connector->crtc_id, value, &nsec);
+   if (ret) {
       *value = 0;
+   } else {
+      connector->last_frame = *value;
+      connector->last_nsec = nsec;
+   }
 
    return VK_SUCCESS;
 }
diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h
index 50a78acacb0..a0c97942954 100644
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@@ -25,6 +25,13 @@ 
 
 #include "wsi_common.h"
 
+struct wsi_timing {
+   bool complete;
+   bool consumed;
+   uint64_t target_msc;
+   VkPastPresentationTimingGOOGLE timing;
+};
+
 struct wsi_image {
    VkImage image;
    VkDeviceMemory memory;
@@ -41,8 +48,16 @@  struct wsi_image {
    uint32_t offsets[4];
    uint32_t row_pitches[4];
    int fds[4];
+
+   VkQueryPool query_pool;
+
+   VkCommandBuffer timestamp_buffer;
+
+   struct wsi_timing *timing;
 };
 
+#define WSI_TIMING_HISTORY      16
+
 struct wsi_swapchain {
    const struct wsi_device *wsi;
 
@@ -54,6 +69,16 @@  struct wsi_swapchain {
 
    bool use_prime_blit;
 
+   uint32_t timing_insert;
+   uint32_t timing_count;
+
+   struct wsi_timing timing[WSI_TIMING_HISTORY];
+
+   uint64_t frame_msc;
+   uint64_t frame_ust;
+
+   float timestamp_period;
+
    /* Command pools, one per queue family */
    VkCommandPool *cmd_pools;
 
@@ -67,6 +92,10 @@  struct wsi_swapchain {
    VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
                              uint32_t image_index,
                              const VkPresentRegionKHR *damage);
+   VkResult (*get_refresh_cycle_duration)(struct wsi_swapchain *swap_chain,
+                                          VkRefreshCycleDurationGOOGLE
+                                          *pDisplayTimingProperties);
+
 };
 
 bool
@@ -100,6 +129,12 @@  wsi_destroy_image(const struct wsi_swapchain *chain,
                   struct wsi_image *image);
 
 
+void
+wsi_mark_timing(struct wsi_swapchain *swapchain,
+                struct wsi_image *image,
+                uint64_t ust,
+                uint64_t msc);
+
 struct wsi_interface {
    VkResult (*get_support)(VkIcdSurfaceBase *surface,
                            struct wsi_device *wsi_device,
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index c740726fe54..4821329e533 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -687,6 +687,7 @@  struct x11_image {
    bool                                      busy;
    struct xshmfence *                        shm_fence;
    uint32_t                                  sync_fence;
+   uint32_t                                  serial;
 };
 
 struct x11_swapchain {
@@ -705,6 +706,8 @@  struct x11_swapchain {
    uint64_t                                     send_sbc;
    uint64_t                                     last_present_msc;
    uint32_t                                     stamp;
+   uint64_t                                     last_present_nsec;
+   uint64_t                                     refresh_period;
 
    bool                                         threaded;
    VkResult                                     status;
@@ -796,8 +799,39 @@  x11_handle_dri3_present_event(struct x11_swapchain *chain,
 
    case XCB_PRESENT_EVENT_COMPLETE_NOTIFY: {
       xcb_present_complete_notify_event_t *complete = (void *) event;
-      if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP)
+      if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
+         uint64_t frames = complete->msc - chain->last_present_msc;
+         uint64_t present_nsec = complete->ust * 1000;
+
+         /*
+          * Well, this is about as good as we can do -- measure the refresh
+          * instead of asking for the current mode and using that. Turns out,
+          * for eDP panels, this works better anyways as they use the builtin
+          * fixed mode for everything
+          */
+         if (0 < frames && frames < 10 &&
+             present_nsec > chain->last_present_nsec)
+         {
+            uint64_t refresh_period =
+               (present_nsec - chain->last_present_nsec + frames / 2) / frames;
+
+            if (chain->refresh_period)
+               refresh_period =
+                  (3 * chain->refresh_period + refresh_period) >> 2;
+
+            chain->refresh_period = refresh_period;
+         }
+
          chain->last_present_msc = complete->msc;
+         chain->last_present_nsec = present_nsec;
+         for (unsigned i = 0; i < chain->base.image_count; i++) {
+            if (chain->images[i].serial == complete->serial) {
+               wsi_mark_timing(&chain->base, &chain->images[i].base,
+                               present_nsec, complete->msc);
+               break;
+            }
+         }
+      }
 
       VkResult result = VK_SUCCESS;
 
@@ -941,7 +975,7 @@  x11_acquire_next_image_from_queue(struct x11_swapchain *chain,
 
 static VkResult
 x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
-                   uint32_t target_msc)
+                   uint64_t target_msc)
 {
    struct x11_image *image = &chain->images[image_index];
 
@@ -963,11 +997,12 @@  x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
    xshmfence_reset(image->shm_fence);
 
    ++chain->send_sbc;
+   image->serial = (uint32_t) chain->send_sbc;
    xcb_void_cookie_t cookie =
       xcb_present_pixmap(chain->conn,
                          chain->window,
                          image->pixmap,
-                         (uint32_t) chain->send_sbc,
+                         image->serial,
                          0,                                    /* valid */
                          0,                                    /* update */
                          0,                                    /* x_off */
@@ -1017,6 +1052,26 @@  x11_queue_present(struct wsi_swapchain *anv_chain,
    }
 }
 
+static uint64_t
+x11_refresh_duration(struct x11_swapchain *chain)
+{
+   /* Pick 60Hz if we don't know what it actually is yet */
+   if (!chain->refresh_period)
+      return (uint64_t) (1e9 / 59.98 + 0.5);
+
+   return chain->refresh_period;
+}
+
+static VkResult
+x11_get_refresh(struct wsi_swapchain *wsi_chain,
+                VkRefreshCycleDurationGOOGLE *timings)
+{
+   struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
+
+   timings->refreshDuration = x11_refresh_duration(chain);
+   return VK_SUCCESS;
+}
+
 static void *
 x11_manage_fifo_queues(void *state)
 {
@@ -1033,6 +1088,7 @@  x11_manage_fifo_queues(void *state)
        * other than the currently presented one.
        */
       uint32_t image_index = 0;
+      struct x11_image *image;
       result = wsi_queue_pull(&chain->present_queue, &image_index, INT64_MAX);
       assert(result != VK_TIMEOUT);
       if (result < 0) {
@@ -1045,6 +1101,13 @@  x11_manage_fifo_queues(void *state)
       }
 
       uint64_t target_msc = chain->last_present_msc + 1;
+
+      image = &chain->images[image_index];
+
+      struct wsi_timing *timing = image->base.timing;
+      if (timing && timing->target_msc != 0 && timing->target_msc > target_msc)
+         target_msc = timing->target_msc;
+
       result = x11_present_to_x11(chain, image_index, target_msc);
       if (result < 0)
          goto fail;
@@ -1348,6 +1411,7 @@  x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->base.acquire_next_image = x11_acquire_next_image;
    chain->base.queue_present = x11_queue_present;
    chain->base.present_mode = pCreateInfo->presentMode;
+   chain->base.get_refresh_cycle_duration = x11_get_refresh;
    chain->base.image_count = num_images;
    chain->conn = conn;
    chain->window = window;
@@ -1355,6 +1419,7 @@  x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->extent = pCreateInfo->imageExtent;
    chain->send_sbc = 0;
    chain->last_present_msc = 0;
+   chain->last_present_nsec = 0;
    chain->threaded = false;
    chain->status = VK_SUCCESS;
    chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers;