Message ID | 20231127185723.10348-19-mcanal@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/v3d: Introduce CPU jobs | expand |
El lun, 27-11-2023 a las 15:48 -0300, Maíra Canal escribió: > A CPU job is a type of job that performs operations that requires CPU > intervention. A copy performance query job is a job that copy the > complete > or partial result of a query to a buffer. In order to copy the result > of > a performance query to a buffer, we need to get the values from the > performance monitors. > > So, create a user extension for the CPU job that enables the creation > of a copy performance query job. This user extension will allow the > creation > of a CPU job that copy the results of a performance query to a BO > with the > possibility to indicate the availability with a availability bit. > > Signed-off-by: Maíra Canal <mcanal@igalia.com> > --- > drivers/gpu/drm/v3d/v3d_drv.h | 1 + > drivers/gpu/drm/v3d/v3d_sched.c | 66 +++++++++++++++++++++++++ > drivers/gpu/drm/v3d/v3d_submit.c | 82 > ++++++++++++++++++++++++++++++++ > include/uapi/drm/v3d_drm.h | 50 +++++++++++++++++++ > 4 files changed, 199 insertions(+) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h > b/drivers/gpu/drm/v3d/v3d_drv.h > index 0f7f80ad8d88..3c7d58866570 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -322,6 +322,7 @@ enum v3d_cpu_job_type { > V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY, > V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY, > V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY, > + V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY, > }; > > struct v3d_timestamp_query { > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c > b/drivers/gpu/drm/v3d/v3d_sched.c > index 452c4a1db52e..203c32ed99d4 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -450,12 +450,78 @@ v3d_reset_performance_queries(struct > v3d_cpu_job *job) > } > } > > +static void > +v3d_write_performance_query_result(struct v3d_cpu_job *job, void > *data, u32 query) > +{ > + struct v3d_performance_query_info *performance_query = &job- > >performance_query; > + struct v3d_copy_query_results_info *copy = &job->copy; > + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; > + struct v3d_dev *v3d = job->base.v3d; > + struct v3d_perfmon *perfmon; > + u64 counter_values[V3D_PERFCNT_NUM]; > + > + for (int i = 0; i < performance_query->nperfmons; i++) { > + perfmon = v3d_perfmon_find(v3d_priv, > + performance_query- > >queries[query].kperfmon_ids[i]); > + if (!perfmon) { > + DRM_DEBUG("Failed to find perfmon."); > + continue; > + } > + > + v3d_perfmon_stop(v3d, perfmon, true); > + > + memcpy(&counter_values[i * > DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, > + perfmon->ncounters * sizeof(u64)); > + > + v3d_perfmon_put(perfmon); > + } > + > + for (int i = 0; i < performance_query->ncounters; i++) > + write_to_buffer(data, i, copy->do_64bit, > counter_values[i]); > +} > + > + > +static void > +v3d_copy_performance_query(struct v3d_cpu_job *job) > +{ > + struct v3d_performance_query_info *performance_query = &job- > >performance_query; > + struct v3d_copy_query_results_info *copy = &job->copy; > + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); > + struct dma_fence *fence; > + bool available, write_result; > + u8 *data; > + > + v3d_get_bo_vaddr(bo); > + > + data = ((u8 *) bo->vaddr) + copy->offset; > + > + for (int i = 0; i < performance_query->count; i++) { > + fence = drm_syncobj_fence_get(performance_query- > >queries[i].syncobj); > + available = fence ? dma_fence_is_signaled(fence) : > false; > + > + write_result = available || copy->do_partial; > + if (write_result) > + v3d_write_performance_query_result(job, data, > i); > + > + if (copy->availability_bit) > + write_to_buffer(data, performance_query- > >ncounters, > + copy->do_64bit, available ? > 1u : 0u); > + > + data += copy->stride; > + > + dma_fence_put(fence); > + } > + > + v3d_put_bo_vaddr(bo); > +} > + > static const v3d_cpu_job_fn cpu_job_function[] = { > [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = > v3d_rewrite_csd_job_wg_counts_from_indirect, > [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, > [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = > v3d_reset_timestamp_queries, > [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = > v3d_copy_query_results, > [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = > v3d_reset_performance_queries, > + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = > v3d_copy_performance_query, > }; > > static struct dma_fence * > diff --git a/drivers/gpu/drm/v3d/v3d_submit.c > b/drivers/gpu/drm/v3d/v3d_submit.c > index 20af8ae14831..d7a9da2484fd 100644 > --- a/drivers/gpu/drm/v3d/v3d_submit.c > +++ b/drivers/gpu/drm/v3d/v3d_submit.c > @@ -672,6 +672,84 @@ v3d_get_cpu_reset_performance_params(struct > drm_file *file_priv, > return 0; > } > > +static int > +v3d_get_cpu_copy_performance_query_params(struct drm_file > *file_priv, > + struct drm_v3d_extension > __user *ext, > + struct v3d_cpu_job *job) > +{ > + u32 __user *syncs; > + u64 __user *kperfmon_ids; > + struct drm_v3d_copy_performance_query copy; > + > + if (!job) { > + DRM_DEBUG("CPU job extension was attached to a GPU > job.\n"); > + return -EINVAL; > + } > + > + if (job->job_type) { > + DRM_DEBUG("Two CPU job extensions were added to the > same CPU job.\n"); > + return -EINVAL; > + } > + > + if (copy_from_user(©, ext, sizeof(copy))) > + return -EFAULT; > + > + if (copy.pad) > + return -EINVAL; > + > + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; > + > + job->performance_query.queries = kvmalloc_array(copy.count, > + sizeof(struct > v3d_performance_query), > + GFP_KERNEL); > + if (!job->performance_query.queries) > + return -ENOMEM; > + > + syncs = u64_to_user_ptr(copy.syncs); > + kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); > + > + for (int i = 0; i < copy.count; i++) { > + u32 sync; > + u64 ids; > + u32 __user *ids_pointer; > + u32 id; > + > + if (copy_from_user(&sync, syncs++, sizeof(sync))) { > + kvfree(job->performance_query.queries); > + return -EFAULT; > + } > + > + job->performance_query.queries[i].syncobj = > drm_syncobj_find(file_priv, sync); > + > + if (copy_from_user(&ids, kperfmon_ids++, > sizeof(ids))) { > + kvfree(job->performance_query.queries); > + return -EFAULT; > + } > + > + ids_pointer = u64_to_user_ptr(ids); > + > + for (int j = 0; j < copy.nperfmons; j++) { > + if (copy_from_user(&id, ids_pointer++, > sizeof(id))) { > + kvfree(job- > >performance_query.queries); > + return -EFAULT; > + } > + > + job- > >performance_query.queries[i].kperfmon_ids[j] = id; > + } > + } > + job->performance_query.count = copy.count; > + job->performance_query.nperfmons = copy.nperfmons; > + job->performance_query.ncounters = copy.ncounters; > + > + job->copy.do_64bit = copy.do_64bit; > + job->copy.do_partial = copy.do_partial; > + job->copy.availability_bit = copy.availability_bit; > + job->copy.offset = copy.offset; > + job->copy.stride = copy.stride; > + > + return 0; > +} > + > /* Whenever userspace sets ioctl extensions, v3d_get_extensions > parses data > * according to the extension id (name). > */ > @@ -712,6 +790,9 @@ v3d_get_extensions(struct drm_file *file_priv, > case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: > ret = > v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); > break; > + case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: > + ret = > v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); > + break; > default: > DRM_DEBUG_DRIVER("Unknown extension id: > %d\n", ext.id); > return -EINVAL; > @@ -1092,6 +1173,7 @@ static const unsigned int > cpu_job_bo_handle_count[] = { > [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, > [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, > [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, > + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, > }; > > /** > diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h > index 76a02d2c01e6..9b99d554ef9c 100644 > --- a/include/uapi/drm/v3d_drm.h > +++ b/include/uapi/drm/v3d_drm.h > @@ -77,6 +77,7 @@ struct drm_v3d_extension { > #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04 > #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05 > #define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06 > +#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY 0x07 > __u32 flags; /* mbz */ > }; > > @@ -519,6 +520,52 @@ struct drm_v3d_reset_performance_query { > __u64 kperfmon_ids; > }; > > +/** > + * struct drm_v3d_copy_performance_query - ioctl extension for the > CPU job to copy > + * performance query results to a buffer > + * > + * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is > defined, it > + * points to this extension to define a copy performance query > submission. This > + * CPU job will copy the performance queries results to a BO with > the offset > + * and stride defined in the extension. > + */ > +struct drm_v3d_copy_performance_query { > + struct drm_v3d_extension base; > + > + /* Define if should write to buffer using 64 or 32 bits */ > + __u8 do_64bit; > + > + /* Define if it can write to buffer even if the query is not > available */ > + __u8 do_partial; > + > + /* Define if it should write availability bit to buffer */ > + __u8 availability_bit; > + > + /* mbz */ > + __u8 pad; > + > + /* Offset of the buffer in the BO */ > + __u32 offset; > + > + /* Stride of the buffer in the BO */ > + __u32 stride; > + > + /* Number of performance monitors */ > + __u32 nperfmons; > + > + /* Number of performance counters related to this query pool > */ > + __u32 ncounters; > + > + /* Number of queries */ > + __u32 count; > + > + /* Array of performance queries's syncobjs to indicate its > availability */ > + __u64 syncs; > + > + /* Array of u64 user-pointers that point to an array of > kperfmon_ids */ > + __u64 kperfmon_ids; > +}; > + > struct drm_v3d_submit_cpu { > /* Pointer to a u32 array of the BOs that are referenced by > the job. > * > @@ -537,6 +584,9 @@ struct drm_v3d_submit_cpu { > * > * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must > contain no > * BOs. > + * > + * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must > contain one > + * BO, for which the performance queries will be written to. (...), where the performance queries will be written.(...) Iago > */ > __u64 bo_handles; >
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 0f7f80ad8d88..3c7d58866570 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -322,6 +322,7 @@ enum v3d_cpu_job_type { V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY, V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY, V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY, + V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY, }; struct v3d_timestamp_query { diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 452c4a1db52e..203c32ed99d4 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -450,12 +450,78 @@ v3d_reset_performance_queries(struct v3d_cpu_job *job) } } +static void +v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query) +{ + struct v3d_performance_query_info *performance_query = &job->performance_query; + struct v3d_copy_query_results_info *copy = &job->copy; + struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_dev *v3d = job->base.v3d; + struct v3d_perfmon *perfmon; + u64 counter_values[V3D_PERFCNT_NUM]; + + for (int i = 0; i < performance_query->nperfmons; i++) { + perfmon = v3d_perfmon_find(v3d_priv, + performance_query->queries[query].kperfmon_ids[i]); + if (!perfmon) { + DRM_DEBUG("Failed to find perfmon."); + continue; + } + + v3d_perfmon_stop(v3d, perfmon, true); + + memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, + perfmon->ncounters * sizeof(u64)); + + v3d_perfmon_put(perfmon); + } + + for (int i = 0; i < performance_query->ncounters; i++) + write_to_buffer(data, i, copy->do_64bit, counter_values[i]); +} + + +static void +v3d_copy_performance_query(struct v3d_cpu_job *job) +{ + struct v3d_performance_query_info *performance_query = &job->performance_query; + struct v3d_copy_query_results_info *copy = &job->copy; + struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); + struct dma_fence *fence; + bool available, write_result; + u8 *data; + + v3d_get_bo_vaddr(bo); + + data = ((u8 *) bo->vaddr) + copy->offset; + + for (int i = 0; i < performance_query->count; i++) { + fence = drm_syncobj_fence_get(performance_query->queries[i].syncobj); + available = fence ? dma_fence_is_signaled(fence) : false; + + write_result = available || copy->do_partial; + if (write_result) + v3d_write_performance_query_result(job, data, i); + + if (copy->availability_bit) + write_to_buffer(data, performance_query->ncounters, + copy->do_64bit, available ? 1u : 0u); + + data += copy->stride; + + dma_fence_put(fence); + } + + v3d_put_bo_vaddr(bo); +} + static const v3d_cpu_job_fn cpu_job_function[] = { [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = v3d_rewrite_csd_job_wg_counts_from_indirect, [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = v3d_timestamp_query, [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = v3d_reset_timestamp_queries, [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = v3d_copy_query_results, [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = v3d_reset_performance_queries, + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = v3d_copy_performance_query, }; static struct dma_fence * diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 20af8ae14831..d7a9da2484fd 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -672,6 +672,84 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, return 0; } +static int +v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, + struct drm_v3d_extension __user *ext, + struct v3d_cpu_job *job) +{ + u32 __user *syncs; + u64 __user *kperfmon_ids; + struct drm_v3d_copy_performance_query copy; + + if (!job) { + DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); + return -EINVAL; + } + + if (job->job_type) { + DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); + return -EINVAL; + } + + if (copy_from_user(©, ext, sizeof(copy))) + return -EFAULT; + + if (copy.pad) + return -EINVAL; + + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; + + job->performance_query.queries = kvmalloc_array(copy.count, + sizeof(struct v3d_performance_query), + GFP_KERNEL); + if (!job->performance_query.queries) + return -ENOMEM; + + syncs = u64_to_user_ptr(copy.syncs); + kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); + + for (int i = 0; i < copy.count; i++) { + u32 sync; + u64 ids; + u32 __user *ids_pointer; + u32 id; + + if (copy_from_user(&sync, syncs++, sizeof(sync))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + + if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + ids_pointer = u64_to_user_ptr(ids); + + for (int j = 0; j < copy.nperfmons; j++) { + if (copy_from_user(&id, ids_pointer++, sizeof(id))) { + kvfree(job->performance_query.queries); + return -EFAULT; + } + + job->performance_query.queries[i].kperfmon_ids[j] = id; + } + } + job->performance_query.count = copy.count; + job->performance_query.nperfmons = copy.nperfmons; + job->performance_query.ncounters = copy.ncounters; + + job->copy.do_64bit = copy.do_64bit; + job->copy.do_partial = copy.do_partial; + job->copy.availability_bit = copy.availability_bit; + job->copy.offset = copy.offset; + job->copy.stride = copy.stride; + + return 0; +} + /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data * according to the extension id (name). */ @@ -712,6 +790,9 @@ v3d_get_extensions(struct drm_file *file_priv, case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); break; + case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: + ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); + break; default: DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); return -EINVAL; @@ -1092,6 +1173,7 @@ static const unsigned int cpu_job_bo_handle_count[] = { [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, + [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, }; /** diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 76a02d2c01e6..9b99d554ef9c 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -77,6 +77,7 @@ struct drm_v3d_extension { #define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04 #define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05 #define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06 +#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY 0x07 __u32 flags; /* mbz */ }; @@ -519,6 +520,52 @@ struct drm_v3d_reset_performance_query { __u64 kperfmon_ids; }; +/** + * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to copy + * performance query results to a buffer + * + * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it + * points to this extension to define a copy performance query submission. This + * CPU job will copy the performance queries results to a BO with the offset + * and stride defined in the extension. + */ +struct drm_v3d_copy_performance_query { + struct drm_v3d_extension base; + + /* Define if should write to buffer using 64 or 32 bits */ + __u8 do_64bit; + + /* Define if it can write to buffer even if the query is not available */ + __u8 do_partial; + + /* Define if it should write availability bit to buffer */ + __u8 availability_bit; + + /* mbz */ + __u8 pad; + + /* Offset of the buffer in the BO */ + __u32 offset; + + /* Stride of the buffer in the BO */ + __u32 stride; + + /* Number of performance monitors */ + __u32 nperfmons; + + /* Number of performance counters related to this query pool */ + __u32 ncounters; + + /* Number of queries */ + __u32 count; + + /* Array of performance queries's syncobjs to indicate its availability */ + __u64 syncs; + + /* Array of u64 user-pointers that point to an array of kperfmon_ids */ + __u64 kperfmon_ids; +}; + struct drm_v3d_submit_cpu { /* Pointer to a u32 array of the BOs that are referenced by the job. * @@ -537,6 +584,9 @@ struct drm_v3d_submit_cpu { * * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no * BOs. + * + * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one + * BO, for which the performance queries will be written to. */ __u64 bo_handles;
A CPU job is a type of job that performs operations that requires CPU intervention. A copy performance query job is a job that copy the complete or partial result of a query to a buffer. In order to copy the result of a performance query to a buffer, we need to get the values from the performance monitors. So, create a user extension for the CPU job that enables the creation of a copy performance query job. This user extension will allow the creation of a CPU job that copy the results of a performance query to a BO with the possibility to indicate the availability with a availability bit. Signed-off-by: Maíra Canal <mcanal@igalia.com> --- drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_sched.c | 66 +++++++++++++++++++++++++ drivers/gpu/drm/v3d/v3d_submit.c | 82 ++++++++++++++++++++++++++++++++ include/uapi/drm/v3d_drm.h | 50 +++++++++++++++++++ 4 files changed, 199 insertions(+)