diff mbox

[2/4] drm/vc4: Syncobj import support

Message ID 20180421225022.7592-3-stschake@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Schake April 21, 2018, 10:50 p.m. UTC
Allow userland to specify a syncobj that is waited on before a render job
starts processing.

Signed-off-by: Stefan Schake <stschake@gmail.com>
---
 drivers/gpu/drm/vc4/vc4_drv.h |  2 ++
 drivers/gpu/drm/vc4/vc4_gem.c | 33 +++++++++++++++++++++++++++------
 include/uapi/drm/vc4_drm.h    |  9 +++++----
 3 files changed, 34 insertions(+), 10 deletions(-)

Comments

Eric Anholt April 23, 2018, 6:59 p.m. UTC | #1
Stefan Schake <stschake@gmail.com> writes:

> Allow userland to specify a syncobj that is waited on before a render job
> starts processing.
>
> Signed-off-by: Stefan Schake <stschake@gmail.com>
> ---
>  drivers/gpu/drm/vc4/vc4_drv.h |  2 ++
>  drivers/gpu/drm/vc4/vc4_gem.c | 33 +++++++++++++++++++++++++++------
>  include/uapi/drm/vc4_drm.h    |  9 +++++----
>  3 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
> index 4288615b66a2..3105df99cb12 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.h
> +++ b/drivers/gpu/drm/vc4/vc4_drv.h
> @@ -10,6 +10,8 @@
>  #include <drm/drmP.h>
>  #include <drm/drm_encoder.h>
>  #include <drm/drm_gem_cma_helper.h>
> +#include <drm/drm_syncobj.h>
> +

Drop the extra \n?

>  #include "uapi/drm/vc4_drm.h"
>  
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 2107b0daf8ef..232363488125 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -27,6 +27,7 @@
>  #include <linux/device.h>
>  #include <linux/io.h>
>  #include <linux/sched/signal.h>
> +#include <linux/dma-fence-array.h>
>  
>  #include "uapi/drm/vc4_drm.h"
>  #include "vc4_drv.h"
> @@ -1115,21 +1116,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	struct drm_vc4_submit_cl *args = data;
>  	struct vc4_exec_info *exec;
>  	struct ww_acquire_ctx acquire_ctx;
> +	struct dma_fence *in_fence;
>  	int ret = 0;
>  
>  	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
>  			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
>  			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
> -			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
> +			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y |
> +			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) {
>  		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
>  		return -EINVAL;
>  	}
>  
> -	if (args->pad2 != 0) {
> -		DRM_DEBUG("->pad2 must be set to zero\n");
> -		return -EINVAL;
> -	}
> -
>  	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
>  	if (!exec) {
>  		DRM_ERROR("malloc failure on exec struct\n");
> @@ -1164,6 +1162,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  		}
>  	}
>  
> +	if (args->flags & VC4_SUBMIT_CL_IMPORT_SYNCOBJ) {
> +		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
> +					     &in_fence);
> +		if (ret)
> +			goto fail;
> +
> +		/* When the fence (or fence array) is exclusively from our
> +		 * context we can skip the wait since jobs are executed in
> +		 * order of their submission through this ioctl and this can
> +		 * only have fences from a prior job.
> +		 */
> +		if (!dma_fence_match_context(in_fence,
> +					     vc4->dma_fence_context)) {
> +			ret = dma_fence_wait(in_fence, true);
> +			if (ret) {
> +				dma_fence_put(in_fence);
> +				goto fail;
> +			}
> +		}
> +
> +		dma_fence_put(in_fence);
> +	}

I don't think we need an extra flag here.  0 is an invalid syncobj
handle, so that could be the indication that there's no input syncobj.

Long term, we should probably only block once we're ready to exec the
job, not at submit time.  However, I think we can wait on fixing that
until we start using the GPU scheduler.
diff mbox

Patch

diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 4288615b66a2..3105df99cb12 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -10,6 +10,8 @@ 
 #include <drm/drmP.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_syncobj.h>
+
 
 #include "uapi/drm/vc4_drm.h"
 
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 2107b0daf8ef..232363488125 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -27,6 +27,7 @@ 
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/sched/signal.h>
+#include <linux/dma-fence-array.h>
 
 #include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
@@ -1115,21 +1116,18 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct drm_vc4_submit_cl *args = data;
 	struct vc4_exec_info *exec;
 	struct ww_acquire_ctx acquire_ctx;
+	struct dma_fence *in_fence;
 	int ret = 0;
 
 	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
 			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
 			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
-			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y |
+			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) {
 		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
 
-	if (args->pad2 != 0) {
-		DRM_DEBUG("->pad2 must be set to zero\n");
-		return -EINVAL;
-	}
-
 	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 	if (!exec) {
 		DRM_ERROR("malloc failure on exec struct\n");
@@ -1164,6 +1162,29 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
+	if (args->flags & VC4_SUBMIT_CL_IMPORT_SYNCOBJ) {
+		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+					     &in_fence);
+		if (ret)
+			goto fail;
+
+		/* When the fence (or fence array) is exclusively from our
+		 * context we can skip the wait since jobs are executed in
+		 * order of their submission through this ioctl and this can
+		 * only have fences from a prior job.
+		 */
+		if (!dma_fence_match_context(in_fence,
+					     vc4->dma_fence_context)) {
+			ret = dma_fence_wait(in_fence, true);
+			if (ret) {
+				dma_fence_put(in_fence);
+				goto fail;
+			}
+		}
+
+		dma_fence_put(in_fence);
+	}
+
 	if (exec->args->bin_cl_size != 0) {
 		ret = vc4_get_bcl(dev, exec);
 		if (ret)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index b95a0e11cb07..389f21931c25 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -173,6 +173,7 @@  struct drm_vc4_submit_cl {
 #define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
+#define VC4_SUBMIT_CL_IMPORT_SYNCOBJ			(1 << 4)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -183,11 +184,11 @@  struct drm_vc4_submit_cl {
 	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
 	__u32 perfmonid;
 
-	/* Unused field to align this struct on 64 bits. Must be set to 0.
-	 * If one ever needs to add an u32 field to this struct, this field
-	 * can be used.
+	/* Syncobj handle to wait on. Set together with IMPORT_SYNCOBJ flag.
+	 * If set, processing of this render job will not start until the
+	 * syncobj is signalled.
 	 */
-	__u32 pad2;
+	__u32 in_sync;
 };
 
 /**