Message ID | 20240709163425.58276-12-tursulin@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | v3d: Perfmon cleanup | expand |
El mar, 09-07-2024 a las 17:34 +0100, Tvrtko Ursulin escribió: > From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > > Removing the intermediate buffer removes the last use of the > V3D_MAX_COUNTERS define, which will enable further driver cleanup. > > While at it pull the 32 vs 64 bit copying decision outside the loop > in > order to reduce the number of conditional instructions. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> > --- > drivers/gpu/drm/v3d/v3d_sched.c | 60 ++++++++++++++++++++----------- > -- > 1 file changed, 37 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c > b/drivers/gpu/drm/v3d/v3d_sched.c > index fc8730264386..77f795e38fad 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -421,18 +421,23 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job > *job) > v3d_put_bo_vaddr(bo); > } > > +static void write_to_buffer_32(u32 *dst, unsigned int idx, u32 > value) > +{ > + dst[idx] = value; > +} > + > +static void write_to_buffer_64(u64 *dst, unsigned int idx, u64 > value) > +{ > + dst[idx] = value; > +} > + > static void > -write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) > +write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64 > value) > { > - if (do_64bit) { > - u64 *dst64 = (u64 *)dst; > - > - dst64[idx] = value; > - } else { > - u32 *dst32 = (u32 *)dst; > - > - dst32[idx] = (u32)value; > - } > + if (do_64bit) > + write_to_buffer_64(dst, idx, value); > + else > + write_to_buffer_32(dst, idx, value); > } > > static void > @@ -505,18 +510,23 @@ v3d_reset_performance_queries(struct > v3d_cpu_job *job) > } > > static void > -v3d_write_performance_query_result(struct v3d_cpu_job *job, void > *data, u32 query) > +v3d_write_performance_query_result(struct v3d_cpu_job *job, void > *data, > + unsigned int query) > { > - struct v3d_performance_query_info *performance_query = &job- > >performance_query; > - struct v3d_copy_query_results_info *copy = &job->copy; > + struct v3d_performance_query_info *performance_query = > + &job- > >performance_query; > struct v3d_file_priv *v3d_priv = job->base.file- > >driver_priv; > struct v3d_dev *v3d = job->base.v3d; > - struct v3d_perfmon *perfmon; > - u64 counter_values[V3D_MAX_COUNTERS]; > + unsigned int i, j, offset; > > - for (int i = 0; i < performance_query->nperfmons; i++) { > - perfmon = v3d_perfmon_find(v3d_priv, > - performance_query- > >queries[query].kperfmon_ids[i]); > + for (i = 0, offset = 0; > + i < performance_query->nperfmons; > + i++, offset += DRM_V3D_MAX_PERF_COUNTERS) { > + struct v3d_performance_query *q = > + &performance_query->queries[query]; Looks like we could move this before the loop. Otherwise this patch is: Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> > + struct v3d_perfmon *perfmon; > + > + perfmon = v3d_perfmon_find(v3d_priv, q- > >kperfmon_ids[i]); > if (!perfmon) { > DRM_DEBUG("Failed to find perfmon."); > continue; > @@ -524,14 +534,18 @@ v3d_write_performance_query_result(struct > v3d_cpu_job *job, void *data, u32 quer > > v3d_perfmon_stop(v3d, perfmon, true); > > - memcpy(&counter_values[i * > DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, > - perfmon->ncounters * sizeof(u64)); > + if (job->copy.do_64bit) { > + for (j = 0; j < perfmon->ncounters; j++) > + write_to_buffer_64(data, offset + j, > + perfmon- > >values[j]); > + } else { > + for (j = 0; j < perfmon->ncounters; j++) > + write_to_buffer_32(data, offset + j, > + perfmon- > >values[j]); > + } > > v3d_perfmon_put(perfmon); > } > - > - for (int i = 0; i < performance_query->ncounters; i++) > - write_to_buffer(data, i, copy->do_64bit, > counter_values[i]); > } > > static void
On 11/07/2024 13:31, Iago Toral wrote: > El mar, 09-07-2024 a las 17:34 +0100, Tvrtko Ursulin escribió: >> From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> >> >> Removing the intermediate buffer removes the last use of the >> V3D_MAX_COUNTERS define, which will enable further driver cleanup. >> >> While at it pull the 32 vs 64 bit copying decision outside the loop >> in >> order to reduce the number of conditional instructions. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> >> --- >> drivers/gpu/drm/v3d/v3d_sched.c | 60 ++++++++++++++++++++----------- >> -- >> 1 file changed, 37 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c >> b/drivers/gpu/drm/v3d/v3d_sched.c >> index fc8730264386..77f795e38fad 100644 >> --- a/drivers/gpu/drm/v3d/v3d_sched.c >> +++ b/drivers/gpu/drm/v3d/v3d_sched.c >> @@ -421,18 +421,23 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job >> *job) >> v3d_put_bo_vaddr(bo); >> } >> >> +static void write_to_buffer_32(u32 *dst, unsigned int idx, u32 >> value) >> +{ >> + dst[idx] = value; >> +} >> + >> +static void write_to_buffer_64(u64 *dst, unsigned int idx, u64 >> value) >> +{ >> + dst[idx] = value; >> +} >> + >> static void >> -write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) >> +write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64 >> value) >> { >> - if (do_64bit) { >> - u64 *dst64 = (u64 *)dst; >> - >> - dst64[idx] = value; >> - } else { >> - u32 *dst32 = (u32 *)dst; >> - >> - dst32[idx] = (u32)value; >> - } >> + if (do_64bit) >> + write_to_buffer_64(dst, idx, value); >> + else >> + write_to_buffer_32(dst, idx, value); >> } >> >> static void >> @@ -505,18 +510,23 @@ v3d_reset_performance_queries(struct >> v3d_cpu_job *job) >> } >> >> static void >> -v3d_write_performance_query_result(struct v3d_cpu_job *job, void >> *data, u32 query) >> +v3d_write_performance_query_result(struct v3d_cpu_job *job, void >> *data, >> + unsigned int query) >> { >> - struct v3d_performance_query_info *performance_query = &job- >>> performance_query; >> - struct v3d_copy_query_results_info *copy = &job->copy; >> + struct v3d_performance_query_info *performance_query = >> + &job- >>> performance_query; >> struct v3d_file_priv *v3d_priv = job->base.file- >>> driver_priv; >> struct v3d_dev *v3d = job->base.v3d; >> - struct v3d_perfmon *perfmon; >> - u64 counter_values[V3D_MAX_COUNTERS]; >> + unsigned int i, j, offset; >> >> - for (int i = 0; i < performance_query->nperfmons; i++) { >> - perfmon = v3d_perfmon_find(v3d_priv, >> - performance_query- >>> queries[query].kperfmon_ids[i]); >> + for (i = 0, offset = 0; >> + i < performance_query->nperfmons; >> + i++, offset += DRM_V3D_MAX_PERF_COUNTERS) { >> + struct v3d_performance_query *q = >> + &performance_query->queries[query]; > > Looks like we could move this before the loop. Indeed! I will change it and re-send, either for v4 of the series, or single update if there will not be any other changes required. > Otherwise this patch is: > Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Thanks! Regards, Tvrtko >> + struct v3d_perfmon *perfmon; >> + >> + perfmon = v3d_perfmon_find(v3d_priv, q- >>> kperfmon_ids[i]); >> if (!perfmon) { >> DRM_DEBUG("Failed to find perfmon."); >> continue; >> @@ -524,14 +534,18 @@ v3d_write_performance_query_result(struct >> v3d_cpu_job *job, void *data, u32 quer >> >> v3d_perfmon_stop(v3d, perfmon, true); >> >> - memcpy(&counter_values[i * >> DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, >> - perfmon->ncounters * sizeof(u64)); >> + if (job->copy.do_64bit) { >> + for (j = 0; j < perfmon->ncounters; j++) >> + write_to_buffer_64(data, offset + j, >> + perfmon- >>> values[j]); >> + } else { >> + for (j = 0; j < perfmon->ncounters; j++) >> + write_to_buffer_32(data, offset + j, >> + perfmon- >>> values[j]); >> + } >> >> v3d_perfmon_put(perfmon); >> } >> - >> - for (int i = 0; i < performance_query->ncounters; i++) >> - write_to_buffer(data, i, copy->do_64bit, >> counter_values[i]); >> } >> >> static void >
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index fc8730264386..77f795e38fad 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -421,18 +421,23 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job *job) v3d_put_bo_vaddr(bo); } +static void write_to_buffer_32(u32 *dst, unsigned int idx, u32 value) +{ + dst[idx] = value; +} + +static void write_to_buffer_64(u64 *dst, unsigned int idx, u64 value) +{ + dst[idx] = value; +} + static void -write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) +write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64 value) { - if (do_64bit) { - u64 *dst64 = (u64 *)dst; - - dst64[idx] = value; - } else { - u32 *dst32 = (u32 *)dst; - - dst32[idx] = (u32)value; - } + if (do_64bit) + write_to_buffer_64(dst, idx, value); + else + write_to_buffer_32(dst, idx, value); } static void @@ -505,18 +510,23 @@ v3d_reset_performance_queries(struct v3d_cpu_job *job) } static void -v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 query) +v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, + unsigned int query) { - struct v3d_performance_query_info *performance_query = &job->performance_query; - struct v3d_copy_query_results_info *copy = &job->copy; + struct v3d_performance_query_info *performance_query = + &job->performance_query; struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; struct v3d_dev *v3d = job->base.v3d; - struct v3d_perfmon *perfmon; - u64 counter_values[V3D_MAX_COUNTERS]; + unsigned int i, j, offset; - for (int i = 0; i < performance_query->nperfmons; i++) { - perfmon = v3d_perfmon_find(v3d_priv, - performance_query->queries[query].kperfmon_ids[i]); + for (i = 0, offset = 0; + i < performance_query->nperfmons; + i++, offset += DRM_V3D_MAX_PERF_COUNTERS) { + struct v3d_performance_query *q = + &performance_query->queries[query]; + struct v3d_perfmon *perfmon; + + perfmon = v3d_perfmon_find(v3d_priv, q->kperfmon_ids[i]); if (!perfmon) { DRM_DEBUG("Failed to find perfmon."); continue; @@ -524,14 +534,18 @@ v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, u32 quer v3d_perfmon_stop(v3d, perfmon, true); - memcpy(&counter_values[i * DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, - perfmon->ncounters * sizeof(u64)); + if (job->copy.do_64bit) { + for (j = 0; j < perfmon->ncounters; j++) + write_to_buffer_64(data, offset + j, + perfmon->values[j]); + } else { + for (j = 0; j < perfmon->ncounters; j++) + write_to_buffer_32(data, offset + j, + perfmon->values[j]); + } v3d_perfmon_put(perfmon); } - - for (int i = 0; i < performance_query->ncounters; i++) - write_to_buffer(data, i, copy->do_64bit, counter_values[i]); } static void