diff mbox series

[v3,15/17] drm/v3d: Create a CPU job extension to copy timestamp query to a buffer

Message ID 20231127185723.10348-17-mcanal@igalia.com (mailing list archive)
State New, archived
Headers show
Series drm/v3d: Introduce CPU jobs | expand

Commit Message

Maíra Canal Nov. 27, 2023, 6:48 p.m. UTC
A CPU job is a type of job that performs operations that requires CPU
intervention. A copy timestamp query job is a job that copy the complete
or partial result of a query to a buffer. As V3D doesn't provide any
mechanism to obtain a timestamp from the GPU, it is a job that needs
CPU intervention.

So, create a user extension for the CPU job that enables the creation
of a copy timestamp query job. This user extension will allow the creation
of a CPU job that copy the results of a timestamp query to a BO with the
possibility to indicate the timestamp availability with a availability bit.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_drv.h    | 20 +++++++++
 drivers/gpu/drm/v3d/v3d_sched.c  | 53 ++++++++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_submit.c | 69 ++++++++++++++++++++++++++++++++
 include/uapi/drm/v3d_drm.h       | 45 +++++++++++++++++++++
 4 files changed, 187 insertions(+)

Comments

Iago Toral Nov. 28, 2023, 8:54 a.m. UTC | #1
El lun, 27-11-2023 a las 15:48 -0300, Maíra Canal escribió:
(...)
>  /**
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 930f8d07f088..a3ae1f220291 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -75,6 +75,7 @@ struct drm_v3d_extension {
>  #define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD                0x02
>  #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY             0x03
>  #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY       0x04
> +#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY        0x05
>         __u32 flags; /* mbz */
>  };
>  
> @@ -451,6 +452,46 @@ struct drm_v3d_reset_timestamp_query {
>         __u32 count;
>  };
>  
> +/**
> + * struct drm_v3d_copy_timestamp_query - ioctl extension for the CPU
> job to copy
> + * query results to a buffer
> + *
> + * When an extension DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY is
> defined, it
> + * points to this extension to define a copy timestamp query
> submission. This
> + * CPU job will copy the timestamp queries results to a BO with the
> offset
> + * and stride defined in the extension.
> + */
> +struct drm_v3d_copy_timestamp_query {
> +       struct drm_v3d_extension base;
> +
> +       /* Define if should write to buffer using 64 or 32 bits */
> +       __u8 do_64bit;
> +
> +       /* Define if it can write to buffer even if the query is not
> available */
> +       __u8 do_partial;
> +
> +       /* Define if it should write availability bit to buffer */
> +       __u8 availability_bit;
> +
> +       /* mbz */
> +       __u8 pad;
> +
> +       /* Offset of the buffer in the BO */
> +       __u32 offset;
> +
> +       /* Stride of the buffer in the BO */
> +       __u32 stride;
> +
> +       /* Number of queries */
> +       __u32 count;
> +
> +       /* Array of queries' offsets within the timestamp BO for
> their value */
> +       __u64 offsets;
> +
> +       /* Array of timestamp's syncobjs to indicate its availability
> */
> +       __u64 syncs;
> +};
> +
>  struct drm_v3d_submit_cpu {
>         /* Pointer to a u32 array of the BOs that are referenced by
> the job.
>          *
> @@ -462,6 +503,10 @@ struct drm_v3d_submit_cpu {
>          *
>          * For DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, it must
> contain only
>          * one BO, that contains the timestamp.
> +        *
> +        * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must
> contain two
> +        * BOs. The first is the BO for which the timestamp queries
> results
> +        * will be written to. The second is the BO that contains the
> timestamp.
>          */

(...) The first is the BO where the timestamp queries will be written
to. (...)

Iago

>         __u64 bo_handles;
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index 3988407635ed..5058a354fffd 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -320,6 +320,7 @@  enum v3d_cpu_job_type {
 	V3D_CPU_JOB_TYPE_INDIRECT_CSD = 1,
 	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
 	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
+	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
 };
 
 struct v3d_timestamp_query {
@@ -361,6 +362,23 @@  struct v3d_timestamp_query_info {
 	u32 count;
 };
 
+struct v3d_copy_query_results_info {
+	/* Define if should write to buffer using 64 or 32 bits */
+	bool do_64bit;
+
+	/* Define if it can write to buffer even if the query is not available */
+	bool do_partial;
+
+	/* Define if it should write availability bit to buffer */
+	bool availability_bit;
+
+	/* Offset of the copy buffer in the BO */
+	u32 offset;
+
+	/* Stride of the copy buffer in the BO */
+	u32 stride;
+};
+
 struct v3d_cpu_job {
 	struct v3d_job base;
 
@@ -369,6 +387,8 @@  struct v3d_cpu_job {
 	struct v3d_indirect_csd_info indirect_csd;
 
 	struct v3d_timestamp_query_info timestamp_query;
+
+	struct v3d_copy_query_results_info copy;
 };
 
 typedef void (*v3d_cpu_job_fn)(struct v3d_cpu_job *);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 3a435f621b9b..07c897cd3423 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -361,11 +361,64 @@  v3d_reset_timestamp_queries(struct v3d_cpu_job *job)
 
 	v3d_put_bo_vaddr(bo);
 }
+static void
+write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value)
+{
+	if (do_64bit) {
+		u64 *dst64 = (u64 *) dst;
+		dst64[idx] = value;
+	} else {
+		u32 *dst32 = (u32 *) dst;
+		dst32[idx] = (u32) value;
+	}
+}
+
+static void
+v3d_copy_query_results(struct v3d_cpu_job *job)
+{
+	struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query;
+	struct v3d_timestamp_query *queries = timestamp_query->queries;
+	struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
+	struct v3d_bo *timestamp = to_v3d_bo(job->base.bo[1]);
+	struct v3d_copy_query_results_info *copy = &job->copy;
+	struct dma_fence *fence;
+	u8 *query_addr;
+	bool available, write_result;
+	u8 *data;
+	int i;
+
+	v3d_get_bo_vaddr(bo);
+	v3d_get_bo_vaddr(timestamp);
+
+	data = ((u8 *) bo->vaddr) + copy->offset;
+
+	for (i = 0; i < timestamp_query->count; i++) {
+		fence = drm_syncobj_fence_get(queries[i].syncobj);
+		available = fence ? dma_fence_is_signaled(fence) : false;
+
+		write_result = available || copy->do_partial;
+		if (write_result) {
+			query_addr = ((u8 *) timestamp->vaddr) + queries[i].offset;
+			write_to_buffer(data, 0, copy->do_64bit, *((u64 *) query_addr));
+		}
+
+		if (copy->availability_bit)
+			write_to_buffer(data, 1, copy->do_64bit, available ? 1u : 0u);
+
+		data += copy->stride;
+
+		dma_fence_put(fence);
+	}
+
+	v3d_put_bo_vaddr(timestamp);
+	v3d_put_bo_vaddr(bo);
+}
 
 static const v3d_cpu_job_fn cpu_job_function[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries,
+	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results,
 };
 
 static struct dma_fence *
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index 1c719416e26a..bafd49c6440c 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -539,6 +539,71 @@  v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv,
 	return 0;
 }
 
+/* Get data for the copy timestamp query results job submission. */
+static int
+v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv,
+				      struct drm_v3d_extension __user *ext,
+				      struct v3d_cpu_job *job)
+{
+	u32 __user *offsets, *syncs;
+	struct drm_v3d_copy_timestamp_query copy;
+	int i;
+
+	if (!job) {
+		DRM_DEBUG("CPU job extension was attached to a GPU job.\n");
+		return -EINVAL;
+	}
+
+	if (job->job_type) {
+		DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&copy, ext, sizeof(copy)))
+		return -EFAULT;
+
+	if (copy.pad)
+		return -EINVAL;
+
+	job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY;
+
+	job->timestamp_query.queries = kvmalloc_array(copy.count,
+						      sizeof(struct v3d_timestamp_query),
+						      GFP_KERNEL);
+	if (!job->timestamp_query.queries)
+		return -ENOMEM;
+
+	offsets = u64_to_user_ptr(copy.offsets);
+	syncs = u64_to_user_ptr(copy.syncs);
+
+	for (i = 0; i < copy.count; i++) {
+		u32 offset, sync;
+
+		if (copy_from_user(&offset, offsets++, sizeof(offset))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].offset = offset;
+
+		if (copy_from_user(&sync, syncs++, sizeof(sync))) {
+			kvfree(job->timestamp_query.queries);
+			return -EFAULT;
+		}
+
+		job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync);
+	}
+	job->timestamp_query.count = copy.count;
+
+	job->copy.do_64bit = copy.do_64bit;
+	job->copy.do_partial = copy.do_partial;
+	job->copy.availability_bit = copy.availability_bit;
+	job->copy.offset = copy.offset;
+	job->copy.stride = copy.stride;
+
+	return 0;
+}
+
 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data
  * according to the extension id (name).
  */
@@ -573,6 +638,9 @@  v3d_get_extensions(struct drm_file *file_priv,
 		case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY:
 			ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job);
 			break;
+		case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY:
+			ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job);
+			break;
 		default:
 			DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id);
 			return -EINVAL;
@@ -951,6 +1019,7 @@  static const unsigned int cpu_job_bo_handle_count[] = {
 	[V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1,
 	[V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1,
 	[V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1,
+	[V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2,
 };
 
 /**
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 930f8d07f088..a3ae1f220291 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -75,6 +75,7 @@  struct drm_v3d_extension {
 #define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD		0x02
 #define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY		0x03
 #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY	0x04
+#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY	0x05
 	__u32 flags; /* mbz */
 };
 
@@ -451,6 +452,46 @@  struct drm_v3d_reset_timestamp_query {
 	__u32 count;
 };
 
+/**
+ * struct drm_v3d_copy_timestamp_query - ioctl extension for the CPU job to copy
+ * query results to a buffer
+ *
+ * When an extension DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY is defined, it
+ * points to this extension to define a copy timestamp query submission. This
+ * CPU job will copy the timestamp queries results to a BO with the offset
+ * and stride defined in the extension.
+ */
+struct drm_v3d_copy_timestamp_query {
+	struct drm_v3d_extension base;
+
+	/* Define if should write to buffer using 64 or 32 bits */
+	__u8 do_64bit;
+
+	/* Define if it can write to buffer even if the query is not available */
+	__u8 do_partial;
+
+	/* Define if it should write availability bit to buffer */
+	__u8 availability_bit;
+
+	/* mbz */
+	__u8 pad;
+
+	/* Offset of the buffer in the BO */
+	__u32 offset;
+
+	/* Stride of the buffer in the BO */
+	__u32 stride;
+
+	/* Number of queries */
+	__u32 count;
+
+	/* Array of queries' offsets within the timestamp BO for their value */
+	__u64 offsets;
+
+	/* Array of timestamp's syncobjs to indicate its availability */
+	__u64 syncs;
+};
+
 struct drm_v3d_submit_cpu {
 	/* Pointer to a u32 array of the BOs that are referenced by the job.
 	 *
@@ -462,6 +503,10 @@  struct drm_v3d_submit_cpu {
 	 *
 	 * For DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, it must contain only
 	 * one BO, that contains the timestamp.
+	 *
+	 * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must contain two
+	 * BOs. The first is the BO for which the timestamp queries results
+	 * will be written to. The second is the BO that contains the timestamp.
 	 */
 	__u64 bo_handles;