Patchwork [1/2] drm/vc4: Add a mechanism to easily extend CL submissions

login
register
mail settings
Submitter Boris BREZILLON
Date Dec. 7, 2017, 3:43 p.m.
Message ID <20171207154308.22440-2-boris.brezillon@free-electrons.com>
Download mbox | patch
Permalink /patch/10099495/
State New
Headers show

Comments

Boris BREZILLON - Dec. 7, 2017, 3:43 p.m.
The number of attributes/objects you can pass to the
DRM_IOCTL_VC4_SUBMIT_CL ioctl is limited by the drm_vc4_submit_cl struct
size.

Add a mechanism to easily pass extra attributes when submitting a CL to
the V3D engine.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/gpu/drm/vc4/vc4_drv.c |  1 +
 drivers/gpu/drm/vc4/vc4_drv.h |  2 ++
 drivers/gpu/drm/vc4/vc4_gem.c | 60 +++++++++++++++++++++++++++++++-
 include/uapi/drm/vc4_drm.h    | 81 +++++++++++++++++++++++++++++++++++++------
 4 files changed, 133 insertions(+), 11 deletions(-)
Eric Anholt - Dec. 22, 2017, 8:53 p.m.
Boris Brezillon <boris.brezillon@free-electrons.com> writes:

> The number of attributes/objects you can pass to the
> DRM_IOCTL_VC4_SUBMIT_CL ioctl is limited by the drm_vc4_submit_cl struct
> size.
>
> Add a mechanism to easily pass extra attributes when submitting a CL to
> the V3D engine.
>
> Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
> ---

> diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> index 52263b575bdc..ddcaa72da82a 100644
> --- a/include/uapi/drm/vc4_drm.h
> +++ b/include/uapi/drm/vc4_drm.h
> @@ -70,6 +70,50 @@ struct drm_vc4_submit_rcl_surface {
>  };
>  
>  /**
> + * @VC4_BIN_CL_CHUNK: binner CL chunk
> + */
> +enum {
> +	VC4_BIN_CL_CHUNK,
> +};
> +
> +/**
> + * struct drm_vc4_submit_cl_chunk - dummy chunk
> + * @type: extension type
> + * @pad: unused, should be set to zero
> + *
> + * Meant to be used for chunks that do not require extra parameters.
> + */
> +struct drm_vc4_submit_cl_dummy_chunk {
> +	__u32 type;
> +	__u32 pad[3];
> +};
> +
> +/**
> + * struct drm_vc4_submit_cl_bin_chunk - binner CL chunk
> + *
> + * @type: extention type, should be set to %VC4_BIN_CL_CHUNK
> + * @size: size in bytes of the binner CL
> + * @ptr: userspace pointer to the binner CL
> + */
> +struct drm_vc4_submit_cl_bin_chunk {
> +	__u32 type;
> +	__u32 size;
> +	__u64 ptr;
> +};
> +
> +/**
> + * union drm_vc4_submit_cl_chunk - CL chunk
> + *
> + * CL chunks allow us to easily extend the set of arguments one can pass
> + * to the submit CL ioctl without having to add new ioctls/struct everytime
> + * we run out of free fields in the drm_vc4_submit_cl struct.
> + */
> +union drm_vc4_submit_cl_chunk {
> +	struct drm_vc4_submit_cl_dummy_chunk dummy;
> +	struct drm_vc4_submit_cl_bin_chunk bin;
> +};
> +
> +/**
>   * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
>   * engine.
>   *
> @@ -83,14 +127,23 @@ struct drm_vc4_submit_rcl_surface {
>   * BO.
>   */
>  struct drm_vc4_submit_cl {
> -	/* Pointer to the binner command list.
> -	 *
> -	 * This is the first set of commands executed, which runs the
> -	 * coordinate shader to determine where primitives land on the screen,
> -	 * then writes out the state updates and draw calls necessary per tile
> -	 * to the tile allocation BO.
> -	 */
> -	__u64 bin_cl;
> +	union {
> +		/* Pointer to the binner command list.
> +		 *
> +		 * This is the first set of commands executed, which runs the
> +		 * coordinate shader to determine where primitives land on
> +		 * the screen, then writes out the state updates and draw calls
> +		 * necessary per tile to the tile allocation BO.
> +		 */
> +		__u64 bin_cl;
> +
> +		/* Pointer to an array of CL chunks.
> +		 *
> +		 * This is now the preferred way of passing optional attributes
> +		 * when submitting a job.
> +		 */
> +		__u64 cl_chunks;
> +	};
>  
>  	/* Pointer to the shader records.
>  	 *
> @@ -120,8 +173,14 @@ struct drm_vc4_submit_cl {
>  	__u64 uniforms;
>  	__u64 bo_handles;
>  
> -	/* Size in bytes of the binner command list. */
> -	__u32 bin_cl_size;
> +	union {
> +		/* Size in bytes of the binner command list. */
> +		__u32 bin_cl_size;
> +
> +		/* Number of entries in the CL extension array. */
> +		__u32 num_cl_chunks;
> +	};
> +
>  	/* Size in bytes of the set of shader records. */
>  	__u32 shader_rec_size;
>  	/* Number of shader records.
> @@ -167,6 +226,7 @@ struct drm_vc4_submit_cl {
>  #define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
>  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
>  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
> +#define VC4_SUBMIT_CL_EXTENDED				(1 << 4)
>  	__u32 flags;
>  
>  	/* Returned value of the seqno of this render job (for the
> @@ -308,6 +368,7 @@ struct drm_vc4_get_hang_state {
>  #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
>  #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
>  #define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
> +#define DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL	8
>  
>  struct drm_vc4_get_param {
>  	__u32 param;
> -- 
> 2.11.0

The vivante folks just extended their batch submit for performance
monitors, and I was surprised to see that they actually extended their
struct (without even a flag indicating that userspace was submitting an
extended struct), which I thought we couldn't do.  Apparently 6 years
ago(!) the DRM core support was changed so that the driver always gets
an ioctl arg of the size it requested, and if userspace submitted
shorter then only the shorter amount is copied in/out and the rest is
zero-padded.

That means we could avoid this union stuff and even the whole idea of
chunks, and just have a single extra id for the perfmon to use in this
exec.  (assuming that 0 isn't a valid perfmon handle).

Does this sound good to you?  It seems like it would be a lot cleaner.
Boris BREZILLON - Dec. 22, 2017, 8:59 p.m.
On Fri, 22 Dec 2017 12:53:36 -0800
Eric Anholt <eric@anholt.net> wrote:

> Boris Brezillon <boris.brezillon@free-electrons.com> writes:
> 
> > The number of attributes/objects you can pass to the
> > DRM_IOCTL_VC4_SUBMIT_CL ioctl is limited by the drm_vc4_submit_cl struct
> > size.
> >
> > Add a mechanism to easily pass extra attributes when submitting a CL to
> > the V3D engine.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
> > ---  
> 
> > diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> > index 52263b575bdc..ddcaa72da82a 100644
> > --- a/include/uapi/drm/vc4_drm.h
> > +++ b/include/uapi/drm/vc4_drm.h
> > @@ -70,6 +70,50 @@ struct drm_vc4_submit_rcl_surface {
> >  };
> >  
> >  /**
> > + * @VC4_BIN_CL_CHUNK: binner CL chunk
> > + */
> > +enum {
> > +	VC4_BIN_CL_CHUNK,
> > +};
> > +
> > +/**
> > + * struct drm_vc4_submit_cl_chunk - dummy chunk
> > + * @type: extension type
> > + * @pad: unused, should be set to zero
> > + *
> > + * Meant to be used for chunks that do not require extra parameters.
> > + */
> > +struct drm_vc4_submit_cl_dummy_chunk {
> > +	__u32 type;
> > +	__u32 pad[3];
> > +};
> > +
> > +/**
> > + * struct drm_vc4_submit_cl_bin_chunk - binner CL chunk
> > + *
> > + * @type: extention type, should be set to %VC4_BIN_CL_CHUNK
> > + * @size: size in bytes of the binner CL
> > + * @ptr: userspace pointer to the binner CL
> > + */
> > +struct drm_vc4_submit_cl_bin_chunk {
> > +	__u32 type;
> > +	__u32 size;
> > +	__u64 ptr;
> > +};
> > +
> > +/**
> > + * union drm_vc4_submit_cl_chunk - CL chunk
> > + *
> > + * CL chunks allow us to easily extend the set of arguments one can pass
> > + * to the submit CL ioctl without having to add new ioctls/struct everytime
> > + * we run out of free fields in the drm_vc4_submit_cl struct.
> > + */
> > +union drm_vc4_submit_cl_chunk {
> > +	struct drm_vc4_submit_cl_dummy_chunk dummy;
> > +	struct drm_vc4_submit_cl_bin_chunk bin;
> > +};
> > +
> > +/**
> >   * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
> >   * engine.
> >   *
> > @@ -83,14 +127,23 @@ struct drm_vc4_submit_rcl_surface {
> >   * BO.
> >   */
> >  struct drm_vc4_submit_cl {
> > -	/* Pointer to the binner command list.
> > -	 *
> > -	 * This is the first set of commands executed, which runs the
> > -	 * coordinate shader to determine where primitives land on the screen,
> > -	 * then writes out the state updates and draw calls necessary per tile
> > -	 * to the tile allocation BO.
> > -	 */
> > -	__u64 bin_cl;
> > +	union {
> > +		/* Pointer to the binner command list.
> > +		 *
> > +		 * This is the first set of commands executed, which runs the
> > +		 * coordinate shader to determine where primitives land on
> > +		 * the screen, then writes out the state updates and draw calls
> > +		 * necessary per tile to the tile allocation BO.
> > +		 */
> > +		__u64 bin_cl;
> > +
> > +		/* Pointer to an array of CL chunks.
> > +		 *
> > +		 * This is now the preferred way of passing optional attributes
> > +		 * when submitting a job.
> > +		 */
> > +		__u64 cl_chunks;
> > +	};
> >  
> >  	/* Pointer to the shader records.
> >  	 *
> > @@ -120,8 +173,14 @@ struct drm_vc4_submit_cl {
> >  	__u64 uniforms;
> >  	__u64 bo_handles;
> >  
> > -	/* Size in bytes of the binner command list. */
> > -	__u32 bin_cl_size;
> > +	union {
> > +		/* Size in bytes of the binner command list. */
> > +		__u32 bin_cl_size;
> > +
> > +		/* Number of entries in the CL extension array. */
> > +		__u32 num_cl_chunks;
> > +	};
> > +
> >  	/* Size in bytes of the set of shader records. */
> >  	__u32 shader_rec_size;
> >  	/* Number of shader records.
> > @@ -167,6 +226,7 @@ struct drm_vc4_submit_cl {
> >  #define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
> >  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
> >  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
> > +#define VC4_SUBMIT_CL_EXTENDED				(1 << 4)
> >  	__u32 flags;
> >  
> >  	/* Returned value of the seqno of this render job (for the
> > @@ -308,6 +368,7 @@ struct drm_vc4_get_hang_state {
> >  #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
> >  #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
> >  #define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
> > +#define DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL	8
> >  
> >  struct drm_vc4_get_param {
> >  	__u32 param;
> > -- 
> > 2.11.0  
> 
> The vivante folks just extended their batch submit for performance
> monitors, and I was surprised to see that they actually extended their
> struct (without even a flag indicating that userspace was submitting an
> extended struct), which I thought we couldn't do.  Apparently 6 years
> ago(!) the DRM core support was changed so that the driver always gets
> an ioctl arg of the size it requested, and if userspace submitted
> shorter then only the shorter amount is copied in/out and the rest is
> zero-padded.
> 
> That means we could avoid this union stuff and even the whole idea of
> chunks, and just have a single extra id for the perfmon to use in this
> exec.  (assuming that 0 isn't a valid perfmon handle).

0 was a valid ID in my implementation, but I can easily exclude it.

> 
> Does this sound good to you?  It seems like it would be a lot cleaner.

Yep, I'll rework the patch to just extend the drm_vc4_submit_cl
struct (add a new u32 perfmonid field).

Patch

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index e3c29729da2e..5c62013f8ca3 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -101,6 +101,7 @@  static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
 	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
 	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+	case DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL:
 		args->value = true;
 		break;
 	default:
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 9c0d380c96f2..3c54cc386443 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -318,6 +318,8 @@  struct vc4_exec_info {
 
 	/* Kernel-space copy of the ioctl arguments */
 	struct drm_vc4_submit_cl *args;
+	uint32_t nchunks;
+	union drm_vc4_submit_cl_chunk *chunks;
 
 	/* This is the array of BOs that were looked up at the start of exec.
 	 * Command validation will use indices into this array.
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 6c32c89a83a9..06976c61422a 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -920,6 +920,7 @@  vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 	}
 	mutex_unlock(&vc4->power_lock);
 
+	kfree(exec->chunks);
 	kfree(exec);
 }
 
@@ -1048,6 +1049,27 @@  vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int
+vc4_parse_cl_chunk(struct vc4_dev *vc4, struct vc4_exec_info *exec,
+		   const union drm_vc4_submit_cl_chunk *chunk)
+{
+	switch(chunk->dummy.type) {
+	case VC4_BIN_CL_CHUNK:
+		/* Update bin_cl related fields so that we don't have to patch
+		 * existing vc4_get_bcl() logic to support the BIN_CL
+		 * chunk.
+		 */
+		exec->args->bin_cl = chunk->bin.ptr;
+		exec->args->bin_cl_size = chunk->bin.size;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
  * @dev: DRM device
@@ -1073,11 +1095,18 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
 			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
 			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
-			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y |
+			     VC4_SUBMIT_CL_EXTENDED)) != 0) {
 		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
 
+	if ((args->flags & VC4_SUBMIT_CL_EXTENDED) &&
+	    !args->num_cl_chunks) {
+		DRM_DEBUG("CL_EXTENDED flag set but no chunks provided\n");
+		return -EINVAL;
+	}
+
 	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 	if (!exec) {
 		DRM_ERROR("malloc failure on exec struct\n");
@@ -1103,6 +1132,35 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	if (args->flags & VC4_SUBMIT_CL_EXTENDED) {
+		uint32_t i;
+
+		ret = -ENOMEM;
+		exec->nchunks = args->num_cl_chunks;
+		exec->chunks = kcalloc(exec->nchunks, sizeof(*exec->chunks),
+				       GFP_KERNEL);
+		if (!exec->chunks)
+			goto fail;
+
+		if (copy_from_user(exec->chunks,
+				   u64_to_user_ptr(args->cl_chunks),
+				   exec->nchunks *
+				   sizeof(*exec->chunks)))
+			goto fail;
+
+		/* Reset ->bin_cl fields so that if no BIN_CL chunk is
+		 * passed, vc4_get_bcl() is skipped.
+		 */
+		exec->args->bin_cl = 0;
+		exec->args->bin_cl_size = 0;
+		for (i = 0; i < exec->nchunks; i++) {
+			ret = vc4_parse_cl_chunk(vc4, exec,
+						 &exec->chunks[i]);
+			if (ret)
+				goto fail;
+		}
+	}
+
 	if (exec->args->bin_cl_size != 0) {
 		ret = vc4_get_bcl(dev, exec);
 		if (ret)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 52263b575bdc..ddcaa72da82a 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -70,6 +70,50 @@  struct drm_vc4_submit_rcl_surface {
 };
 
 /**
+ * @VC4_BIN_CL_CHUNK: binner CL chunk
+ */
+enum {
+	VC4_BIN_CL_CHUNK,
+};
+
+/**
+ * struct drm_vc4_submit_cl_chunk - dummy chunk
+ * @type: extension type
+ * @pad: unused, should be set to zero
+ *
+ * Meant to be used for chunks that do not require extra parameters.
+ */
+struct drm_vc4_submit_cl_dummy_chunk {
+	__u32 type;
+	__u32 pad[3];
+};
+
+/**
+ * struct drm_vc4_submit_cl_bin_chunk - binner CL chunk
+ *
+ * @type: extention type, should be set to %VC4_BIN_CL_CHUNK
+ * @size: size in bytes of the binner CL
+ * @ptr: userspace pointer to the binner CL
+ */
+struct drm_vc4_submit_cl_bin_chunk {
+	__u32 type;
+	__u32 size;
+	__u64 ptr;
+};
+
+/**
+ * union drm_vc4_submit_cl_chunk - CL chunk
+ *
+ * CL chunks allow us to easily extend the set of arguments one can pass
+ * to the submit CL ioctl without having to add new ioctls/struct everytime
+ * we run out of free fields in the drm_vc4_submit_cl struct.
+ */
+union drm_vc4_submit_cl_chunk {
+	struct drm_vc4_submit_cl_dummy_chunk dummy;
+	struct drm_vc4_submit_cl_bin_chunk bin;
+};
+
+/**
  * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
  * engine.
  *
@@ -83,14 +127,23 @@  struct drm_vc4_submit_rcl_surface {
  * BO.
  */
 struct drm_vc4_submit_cl {
-	/* Pointer to the binner command list.
-	 *
-	 * This is the first set of commands executed, which runs the
-	 * coordinate shader to determine where primitives land on the screen,
-	 * then writes out the state updates and draw calls necessary per tile
-	 * to the tile allocation BO.
-	 */
-	__u64 bin_cl;
+	union {
+		/* Pointer to the binner command list.
+		 *
+		 * This is the first set of commands executed, which runs the
+		 * coordinate shader to determine where primitives land on
+		 * the screen, then writes out the state updates and draw calls
+		 * necessary per tile to the tile allocation BO.
+		 */
+		__u64 bin_cl;
+
+		/* Pointer to an array of CL chunks.
+		 *
+		 * This is now the preferred way of passing optional attributes
+		 * when submitting a job.
+		 */
+		__u64 cl_chunks;
+	};
 
 	/* Pointer to the shader records.
 	 *
@@ -120,8 +173,14 @@  struct drm_vc4_submit_cl {
 	__u64 uniforms;
 	__u64 bo_handles;
 
-	/* Size in bytes of the binner command list. */
-	__u32 bin_cl_size;
+	union {
+		/* Size in bytes of the binner command list. */
+		__u32 bin_cl_size;
+
+		/* Number of entries in the CL extension array. */
+		__u32 num_cl_chunks;
+	};
+
 	/* Size in bytes of the set of shader records. */
 	__u32 shader_rec_size;
 	/* Number of shader records.
@@ -167,6 +226,7 @@  struct drm_vc4_submit_cl {
 #define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
+#define VC4_SUBMIT_CL_EXTENDED				(1 << 4)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -308,6 +368,7 @@  struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 #define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
+#define DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL	8
 
 struct drm_vc4_get_param {
 	__u32 param;