diff mbox

[9/9] drm/syncobj: Allow wait for submit and signal behavior (v4)

Message ID 1502491174-10913-10-git-send-email-jason.ekstrand@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jason Ekstrand Aug. 11, 2017, 10:39 p.m. UTC
Vulkan VkFence semantics require that the application be able to perform
a CPU wait on work which may not yet have been submitted.  This is
perfectly safe because the CPU wait has a timeout which will get
triggered eventually if no work is ever submitted.  This behavior is
advantageous for multi-threaded workloads because, so long as all of the
threads agree on what fences to use up-front, you don't have the extra
cross-thread synchronization cost of thread A telling thread B that it
has submitted its dependent work and thread B is now free to wait.

Within a single process, this can be implemented in the userspace driver
by doing exactly the same kind of tracking the app would have to do
using posix condition variables or similar.  However, in order for this
to work cross-process (as is required by VK_KHR_external_fence), we need
to handle this in the kernel.

This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which
instructs the IOCTL to wait for the syncobj to have a non-null fence and
then wait on the fence.  Combined with DRM_IOCTL_SYNCOBJ_RESET, you can
easily get the Vulkan behavior.

v2:
 - Fix a bug in the invalid syncobj error path
 - Unify the wait-all and wait-any cases
v3:
 - Unify the timeout == 0 case a bit with the timeout > 0 case
 - use wait_event_interruptible_timeout
v4:
 - Use proxy fence

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_syncobj.c | 95 +++++++++++++++++++++++++++++++++++++++----
 include/drm/drm_syncobj.h     |  3 +-
 include/uapi/drm/drm.h        |  1 +
 3 files changed, 91 insertions(+), 8 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 226f1e7..4f7c5e5 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -51,6 +51,7 @@ 
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
 #include <linux/sync_file.h>
+#include <linux/dma-fence-proxy.h>
 
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
@@ -82,10 +83,45 @@  EXPORT_SYMBOL(drm_syncobj_find);
 
 struct dma_fence *drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 {
-	return dma_fence_get_rcu_safe(&syncobj->_fence);
+	struct dma_fence *fence = dma_fence_get_rcu_safe(&syncobj->_fence);
+
+	/* Don't hand out our internal proxy fence.  Proxy fences are only used
+	 * for implementing WAIT_FLAGS_WAIT_FOR_SUBMIT behavior and should not
+	 * be handed out to random drivers unless they are prepared to deal
+	 * with the possibility that the fence will never get signaled.
+	 */
+	if (fence && dma_fence_is_proxy_tagged(fence, drm_syncobj_free)) {
+		dma_fence_put(fence);
+		return NULL;
+	}
+
+	return fence;
 }
 EXPORT_SYMBOL(drm_syncobj_fence_get);
 
+static struct dma_fence *drm_syncobj_fence_proxy_get(struct drm_syncobj *syncobj)
+{
+	struct dma_fence *fence, *proxy;
+
+	do {
+		fence = dma_fence_get_rcu_safe(&syncobj->_fence);
+		if (fence)
+			return fence;
+
+		proxy = dma_fence_create_proxy("drm_syncobj", drm_syncobj_free);
+		if (!proxy)
+			return NULL;
+
+		fence = cmpxchg(&syncobj->_fence, NULL, proxy);
+		if (!fence)
+			return dma_fence_get(proxy);
+
+		dma_fence_put(proxy);
+	} while(1);
+
+	return proxy;
+}
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
@@ -97,10 +133,40 @@  void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence)
 {
 	struct dma_fence *old_fence;
+	bool old_fence_is_proxy;
 
-	if (fence)
-		dma_fence_get(fence);
-	old_fence = xchg(&syncobj->_fence, fence);
+	dma_fence_get(fence);
+
+	do {
+		old_fence = dma_fence_get_rcu_safe(&syncobj->_fence);
+		old_fence_is_proxy = old_fence &&
+			dma_fence_is_proxy_tagged(old_fence, drm_syncobj_free);
+		dma_fence_put(old_fence);
+
+		if (!fence && old_fence_is_proxy) {
+			/* If we're replacing a proxy with NULL, just leave
+			 * the proxy.
+			 */
+			return;
+		}
+
+		if (cmpxchg(&syncobj->_fence, old_fence, fence) != old_fence)
+			continue;
+	} while(0);
+
+	if (fence && old_fence_is_proxy) {
+		/* If we just replaced a proxy fence with a real fence,
+		 * assign the real fence to the proxy so that it gets
+		 * triggered when the real fence triggers.  If we are
+		 * replacing a proxy with NULL (such as through
+		 * DRM_IOCTL_SYNCOBJ_RESET), we drop the fence on the floor
+		 * and it will never get signaled.  This is ok because the
+		 * only code which waits on our proxy fences is
+		 * SYNCOBJ_WAIT with WAIT_FLAGS_WAIT_FOR_SUBMIT set and the
+		 * wait ioctl has a timeout which will eventually trigger.
+		 */
+		dma_fence_proxy_assign(old_fence, fence);
+	}
 
 	dma_fence_put(old_fence);
 }
@@ -544,13 +610,15 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj_wait *args = data;
 	uint32_t *handles;
 	struct dma_fence **fences;
+	struct drm_syncobj *syncobj;
 	int ret = 0;
 	uint32_t i;
 
 	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
 		return -ENODEV;
 
-	if (args->flags != 0 && args->flags != DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
+	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
 		return -EINVAL;
 
 	if (args->count_handles == 0)
@@ -577,8 +645,21 @@  drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 	}
 
 	for (i = 0; i < args->count_handles; i++) {
-		ret = drm_syncobj_find_fence(file_private, handles[i],
-					     &fences[i]);
+		syncobj = drm_syncobj_find(file_private, handles[i]);
+		if (!syncobj) {
+			ret = -ENOENT;
+			goto err_free_fence_array;
+		}
+
+		if (args->flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
+			fences[i] = drm_syncobj_fence_proxy_get(syncobj);
+			if (!fences[i])
+				ret = -ENOMEM;
+		} else {
+			fences[i] = drm_syncobj_fence_get(syncobj);
+			if (!fences[i])
+				ret = -EINVAL;
+		}
 		if (ret)
 			goto err_free_fence_array;
 	}
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index c06a441..29037d5 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -42,7 +42,8 @@  struct drm_syncobj {
 	struct kref refcount;
 	/**
 	 * @fence:
-	 * NULL or a pointer to the fence bound to this object.
+	 * NULL or a pointer to the fence bound to this object or a pointer
+	 * to a proxy fence which will be assigned to the next bound fence.
 	 *
 	 * This pointer should not be accessed directly.  Instead, use
 	 * drm_syncobj_fence_get or drm_syncobj_replace_fence.
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 4b301b4..f8ec8fe 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -719,6 +719,7 @@  struct drm_syncobj_handle {
 };
 
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */