Message ID | 20230807211849.49867-2-mcanal@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/v3d: Expose GPU usage stats | expand |
On 08/07, Maíra Canal wrote: > This patch exposes the accumulated amount of active time per client > through the fdinfo infrastructure. The amount of active time is exposed > for each V3D queue: BIN, RENDER, CSD, TFU and CACHE_CLEAN. > > In order to calculate the amount of active time per client, a CPU clock > is used through the function local_clock(). The point where the jobs has > started is marked and is finally compared with the time that the job had > finished. > > Moreover, the number of jobs submitted to each queue is also exposed on > fdinfo through the identifier "v3d-jobs-<queue>". > > Co-developed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com> > Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com> > Signed-off-by: Maíra Canal <mcanal@igalia.com> > --- > drivers/gpu/drm/v3d/v3d_drv.c | 30 +++++++++++++++++++++++++++++- > drivers/gpu/drm/v3d/v3d_drv.h | 23 +++++++++++++++++++++++ > drivers/gpu/drm/v3d/v3d_gem.c | 1 + > drivers/gpu/drm/v3d/v3d_irq.c | 17 +++++++++++++++++ > drivers/gpu/drm/v3d/v3d_sched.c | 24 ++++++++++++++++++++++++ > 5 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > index ffbbe9d527d3..ca65c707da03 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -19,6 +19,7 @@ > #include <linux/module.h> > #include <linux/of_platform.h> > #include <linux/platform_device.h> > +#include <linux/sched/clock.h> > #include <linux/reset.h> > > #include <drm/drm_drv.h> > @@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file) > v3d_priv->v3d = v3d; > > for (i = 0; i < V3D_MAX_QUEUES; i++) { > + v3d_priv->enabled_ns[i] = 0; > + v3d_priv->start_ns[i] = 0; > + v3d_priv->jobs_sent[i] = 0; > + > sched = &v3d->queue[i].sched; > drm_sched_entity_init(&v3d_priv->sched_entity[i], > DRM_SCHED_PRIORITY_NORMAL, &sched, > @@ -136,7 +141,29 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) > kfree(v3d_priv); > } > > -DEFINE_DRM_GEM_FOPS(v3d_drm_fops); > +static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file) > +{ > + struct v3d_file_priv *file_priv = file->driver_priv; > + u64 timestamp = local_clock(); > + enum v3d_queue queue; > + > + for (queue = 0; queue < V3D_MAX_QUEUES; queue++) { > + drm_printf(p, "drm-engine-%s: \t%llu ns\n", > + v3d_queue_to_string(queue), > + file_priv->start_ns[queue] ? file_priv->enabled_ns[queue] > + + timestamp - file_priv->start_ns[queue] > + : file_priv->enabled_ns[queue]); > + > + drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n", > + v3d_queue_to_string(queue), file_priv->jobs_sent[queue]); > + } > +} > + > +static const struct file_operations v3d_drm_fops = { > + .owner = THIS_MODULE, > + DRM_GEM_FOPS, > + .show_fdinfo = drm_show_fdinfo, > +}; Dunno where, but could you document somewhere what is the expected counting behavior in case of a GPU reset? > > /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP > * protection between clients. Note that render nodes would be > @@ -176,6 +203,7 @@ static const struct drm_driver v3d_drm_driver = { > .ioctls = v3d_drm_ioctls, > .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), > .fops = &v3d_drm_fops, > + .show_fdinfo = v3d_show_fdinfo, > > .name = DRIVER_NAME, > .desc = DRIVER_DESC, > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h > index 7f664a4b2a75..7f2897e5b2cb 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -21,6 +21,18 @@ struct reset_control; > > #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) > > +static inline char *v3d_queue_to_string(enum v3d_queue queue) > +{ > + switch (queue) { > + case V3D_BIN: return "bin"; > + case V3D_RENDER: return "render"; > + case V3D_TFU: return "tfu"; > + case V3D_CSD: return "csd"; > + case V3D_CACHE_CLEAN: return "cache_clean"; > + } > + return "UNKNOWN"; > +} > + > struct v3d_queue_state { > struct drm_gpu_scheduler sched; > > @@ -167,6 +179,12 @@ struct v3d_file_priv { > } perfmon; > > struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; > + > + u64 start_ns[V3D_MAX_QUEUES]; > + > + u64 enabled_ns[V3D_MAX_QUEUES]; > + > + u64 jobs_sent[V3D_MAX_QUEUES]; > }; > > struct v3d_bo { > @@ -238,6 +256,11 @@ struct v3d_job { > */ > struct v3d_perfmon *perfmon; > > + /* File descriptor of the process that submitted the job that could be used > + * for collecting stats by process of GPU usage. > + */ > + struct drm_file *file; > + > /* Callback for the freeing of the job on refcount going to 0. */ > void (*free)(struct kref *ref); > }; > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c > index 2e94ce788c71..40ed0c7c3fad 100644 > --- a/drivers/gpu/drm/v3d/v3d_gem.c > +++ b/drivers/gpu/drm/v3d/v3d_gem.c > @@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, > job = *container; > job->v3d = v3d; > job->free = free; > + job->file = file_priv; > > ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], > v3d_priv); > diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c > index e714d5318f30..c898800ae9c2 100644 > --- a/drivers/gpu/drm/v3d/v3d_irq.c > +++ b/drivers/gpu/drm/v3d/v3d_irq.c > @@ -14,6 +14,7 @@ > */ > > #include <linux/platform_device.h> > +#include <linux/sched/clock.h> > > #include "v3d_drv.h" > #include "v3d_regs.h" > @@ -100,6 +101,10 @@ v3d_irq(int irq, void *arg) > if (intsts & V3D_INT_FLDONE) { > struct v3d_fence *fence = > to_v3d_fence(v3d->bin_job->base.irq_fence); > + struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv; > + > + file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN]; > + file->start_ns[V3D_BIN] = 0; > > trace_v3d_bcl_irq(&v3d->drm, fence->seqno); > dma_fence_signal(&fence->base); > @@ -109,6 +114,10 @@ v3d_irq(int irq, void *arg) > if (intsts & V3D_INT_FRDONE) { > struct v3d_fence *fence = > to_v3d_fence(v3d->render_job->base.irq_fence); > + struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv; > + > + file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER]; > + file->start_ns[V3D_RENDER] = 0; > > trace_v3d_rcl_irq(&v3d->drm, fence->seqno); > dma_fence_signal(&fence->base); > @@ -118,6 +127,10 @@ v3d_irq(int irq, void *arg) > if (intsts & V3D_INT_CSDDONE) { > struct v3d_fence *fence = > to_v3d_fence(v3d->csd_job->base.irq_fence); > + struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv; > + > + file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD]; > + file->start_ns[V3D_CSD] = 0; > > trace_v3d_csd_irq(&v3d->drm, fence->seqno); > dma_fence_signal(&fence->base); > @@ -154,6 +167,10 @@ v3d_hub_irq(int irq, void *arg) > if (intsts & V3D_HUB_INT_TFUC) { > struct v3d_fence *fence = > to_v3d_fence(v3d->tfu_job->base.irq_fence); > + struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv; > + > + file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU]; > + file->start_ns[V3D_TFU] = 0; > > trace_v3d_tfu_irq(&v3d->drm, fence->seqno); > dma_fence_signal(&fence->base); > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c > index 06238e6d7f5c..b360709c0765 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -18,6 +18,7 @@ > * semaphores to interlock between them. > */ > > +#include <linux/sched/clock.h> > #include <linux/kthread.h> > > #include "v3d_drv.h" > @@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) > { > struct v3d_bin_job *job = to_bin_job(sched_job); > struct v3d_dev *v3d = job->base.v3d; > + struct v3d_file_priv *file = job->base.file->driver_priv; > struct drm_device *dev = &v3d->drm; > struct dma_fence *fence; > unsigned long irqflags; > @@ -107,6 +109,9 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) > trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, > job->start, job->end); > > + file->start_ns[V3D_BIN] = local_clock(); > + file->jobs_sent[V3D_BIN]++; > + > v3d_switch_perfmon(v3d, &job->base); > > /* Set the current and end address of the control list. > @@ -131,6 +136,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) > { > struct v3d_render_job *job = to_render_job(sched_job); > struct v3d_dev *v3d = job->base.v3d; > + struct v3d_file_priv *file = job->base.file->driver_priv; > struct drm_device *dev = &v3d->drm; > struct dma_fence *fence; > > @@ -158,6 +164,9 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) > trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, > job->start, job->end); > > + file->start_ns[V3D_RENDER] = local_clock(); > + file->jobs_sent[V3D_RENDER]++; > + > v3d_switch_perfmon(v3d, &job->base); > > /* XXX: Set the QCFG */ > @@ -176,6 +185,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) > { > struct v3d_tfu_job *job = to_tfu_job(sched_job); > struct v3d_dev *v3d = job->base.v3d; > + struct v3d_file_priv *file = job->base.file->driver_priv; > struct drm_device *dev = &v3d->drm; > struct dma_fence *fence; > > @@ -190,6 +200,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) > > trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); > > + file->start_ns[V3D_TFU] = local_clock(); > + file->jobs_sent[V3D_TFU]++; > + > V3D_WRITE(V3D_TFU_IIA, job->args.iia); > V3D_WRITE(V3D_TFU_IIS, job->args.iis); > V3D_WRITE(V3D_TFU_ICA, job->args.ica); > @@ -213,6 +226,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) > { > struct v3d_csd_job *job = to_csd_job(sched_job); > struct v3d_dev *v3d = job->base.v3d; > + struct v3d_file_priv *file = job->base.file->driver_priv; > struct drm_device *dev = &v3d->drm; > struct dma_fence *fence; > int i; > @@ -231,6 +245,9 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) > > trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); > > + file->start_ns[V3D_CSD] = local_clock(); > + file->jobs_sent[V3D_CSD]++; > + > v3d_switch_perfmon(v3d, &job->base); > > for (i = 1; i <= 6; i++) > @@ -246,9 +263,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job) > { > struct v3d_job *job = to_v3d_job(sched_job); > struct v3d_dev *v3d = job->v3d; > + struct v3d_file_priv *file = job->file->driver_priv; > + > + file->start_ns[V3D_CACHE_CLEAN] = local_clock(); > + file->jobs_sent[V3D_CACHE_CLEAN]++; > > v3d_clean_caches(v3d); > > + file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN]; > + file->start_ns[V3D_CACHE_CLEAN] = 0; > + > return NULL; > } > > -- > 2.41.0 >
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index ffbbe9d527d3..ca65c707da03 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/sched/clock.h> #include <linux/reset.h> #include <drm/drm_drv.h> @@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file) v3d_priv->v3d = v3d; for (i = 0; i < V3D_MAX_QUEUES; i++) { + v3d_priv->enabled_ns[i] = 0; + v3d_priv->start_ns[i] = 0; + v3d_priv->jobs_sent[i] = 0; + sched = &v3d->queue[i].sched; drm_sched_entity_init(&v3d_priv->sched_entity[i], DRM_SCHED_PRIORITY_NORMAL, &sched, @@ -136,7 +141,29 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) kfree(v3d_priv); } -DEFINE_DRM_GEM_FOPS(v3d_drm_fops); +static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file) +{ + struct v3d_file_priv *file_priv = file->driver_priv; + u64 timestamp = local_clock(); + enum v3d_queue queue; + + for (queue = 0; queue < V3D_MAX_QUEUES; queue++) { + drm_printf(p, "drm-engine-%s: \t%llu ns\n", + v3d_queue_to_string(queue), + file_priv->start_ns[queue] ? file_priv->enabled_ns[queue] + + timestamp - file_priv->start_ns[queue] + : file_priv->enabled_ns[queue]); + + drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n", + v3d_queue_to_string(queue), file_priv->jobs_sent[queue]); + } +} + +static const struct file_operations v3d_drm_fops = { + .owner = THIS_MODULE, + DRM_GEM_FOPS, + .show_fdinfo = drm_show_fdinfo, +}; /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP * protection between clients. Note that render nodes would be @@ -176,6 +203,7 @@ static const struct drm_driver v3d_drm_driver = { .ioctls = v3d_drm_ioctls, .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), .fops = &v3d_drm_fops, + .show_fdinfo = v3d_show_fdinfo, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 7f664a4b2a75..7f2897e5b2cb 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -21,6 +21,18 @@ struct reset_control; #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1) +static inline char *v3d_queue_to_string(enum v3d_queue queue) +{ + switch (queue) { + case V3D_BIN: return "bin"; + case V3D_RENDER: return "render"; + case V3D_TFU: return "tfu"; + case V3D_CSD: return "csd"; + case V3D_CACHE_CLEAN: return "cache_clean"; + } + return "UNKNOWN"; +} + struct v3d_queue_state { struct drm_gpu_scheduler sched; @@ -167,6 +179,12 @@ struct v3d_file_priv { } perfmon; struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; + + u64 start_ns[V3D_MAX_QUEUES]; + + u64 enabled_ns[V3D_MAX_QUEUES]; + + u64 jobs_sent[V3D_MAX_QUEUES]; }; struct v3d_bo { @@ -238,6 +256,11 @@ struct v3d_job { */ struct v3d_perfmon *perfmon; + /* File descriptor of the process that submitted the job that could be used + * for collecting stats by process of GPU usage. + */ + struct drm_file *file; + /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); }; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 2e94ce788c71..40ed0c7c3fad 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, job = *container; job->v3d = v3d; job->free = free; + job->file = file_priv; ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], v3d_priv); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index e714d5318f30..c898800ae9c2 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -14,6 +14,7 @@ */ #include <linux/platform_device.h> +#include <linux/sched/clock.h> #include "v3d_drv.h" #include "v3d_regs.h" @@ -100,6 +101,10 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FLDONE) { struct v3d_fence *fence = to_v3d_fence(v3d->bin_job->base.irq_fence); + struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv; + + file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN]; + file->start_ns[V3D_BIN] = 0; trace_v3d_bcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -109,6 +114,10 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_FRDONE) { struct v3d_fence *fence = to_v3d_fence(v3d->render_job->base.irq_fence); + struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv; + + file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER]; + file->start_ns[V3D_RENDER] = 0; trace_v3d_rcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -118,6 +127,10 @@ v3d_irq(int irq, void *arg) if (intsts & V3D_INT_CSDDONE) { struct v3d_fence *fence = to_v3d_fence(v3d->csd_job->base.irq_fence); + struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv; + + file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD]; + file->start_ns[V3D_CSD] = 0; trace_v3d_csd_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); @@ -154,6 +167,10 @@ v3d_hub_irq(int irq, void *arg) if (intsts & V3D_HUB_INT_TFUC) { struct v3d_fence *fence = to_v3d_fence(v3d->tfu_job->base.irq_fence); + struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv; + + file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU]; + file->start_ns[V3D_TFU] = 0; trace_v3d_tfu_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 06238e6d7f5c..b360709c0765 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -18,6 +18,7 @@ * semaphores to interlock between them. */ +#include <linux/sched/clock.h> #include <linux/kthread.h> #include "v3d_drv.h" @@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; @@ -107,6 +109,9 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_BIN] = local_clock(); + file->jobs_sent[V3D_BIN]++; + v3d_switch_perfmon(v3d, &job->base); /* Set the current and end address of the control list. @@ -131,6 +136,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) { struct v3d_render_job *job = to_render_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -158,6 +164,9 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_RENDER] = local_clock(); + file->jobs_sent[V3D_RENDER]++; + v3d_switch_perfmon(v3d, &job->base); /* XXX: Set the QCFG */ @@ -176,6 +185,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) { struct v3d_tfu_job *job = to_tfu_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -190,6 +200,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); + file->start_ns[V3D_TFU] = local_clock(); + file->jobs_sent[V3D_TFU]++; + V3D_WRITE(V3D_TFU_IIA, job->args.iia); V3D_WRITE(V3D_TFU_IIS, job->args.iis); V3D_WRITE(V3D_TFU_ICA, job->args.ica); @@ -213,6 +226,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) { struct v3d_csd_job *job = to_csd_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; int i; @@ -231,6 +245,9 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); + file->start_ns[V3D_CSD] = local_clock(); + file->jobs_sent[V3D_CSD]++; + v3d_switch_perfmon(v3d, &job->base); for (i = 1; i <= 6; i++) @@ -246,9 +263,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); struct v3d_dev *v3d = job->v3d; + struct v3d_file_priv *file = job->file->driver_priv; + + file->start_ns[V3D_CACHE_CLEAN] = local_clock(); + file->jobs_sent[V3D_CACHE_CLEAN]++; v3d_clean_caches(v3d); + file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN]; + file->start_ns[V3D_CACHE_CLEAN] = 0; + return NULL; }