@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/sched/clock.h>
#include <linux/reset.h>
#include <drm/drm_drv.h>
@@ -111,6 +112,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
v3d_priv->v3d = v3d;
for (i = 0; i < V3D_MAX_QUEUES; i++) {
+ v3d_priv->enabled_ns[i] = 0;
+ v3d_priv->start_ns[i] = 0;
+ v3d_priv->jobs_sent[i] = 0;
+
sched = &v3d->queue[i].sched;
drm_sched_entity_init(&v3d_priv->sched_entity[i],
DRM_SCHED_PRIORITY_NORMAL, &sched,
@@ -136,7 +141,35 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
kfree(v3d_priv);
}
-DEFINE_DRM_GEM_FOPS(v3d_drm_fops);
+static void v3d_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct v3d_file_priv *file_priv = file->driver_priv;
+ u64 timestamp = local_clock();
+ enum v3d_queue queue;
+
+ for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+ /* Note that, in case of a GPU reset, the time spent during an
+ * attempt of executing the job is not computed in the runtime.
+ */
+ drm_printf(p, "drm-engine-%s: \t%llu ns\n",
+ v3d_queue_to_string(queue),
+ file_priv->start_ns[queue] ? file_priv->enabled_ns[queue]
+ + timestamp - file_priv->start_ns[queue]
+ : file_priv->enabled_ns[queue]);
+
+ /* Note that we only count jobs that completed. Therefore, jobs
+ * that were resubmitted due to a GPU reset are not computed.
+ */
+ drm_printf(p, "v3d-jobs-%s: \t%llu jobs\n",
+ v3d_queue_to_string(queue), file_priv->jobs_sent[queue]);
+ }
+}
+
+static const struct file_operations v3d_drm_fops = {
+ .owner = THIS_MODULE,
+ DRM_GEM_FOPS,
+ .show_fdinfo = drm_show_fdinfo,
+};
/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
* protection between clients. Note that render nodes would be
@@ -176,6 +209,7 @@ static const struct drm_driver v3d_drm_driver = {
.ioctls = v3d_drm_ioctls,
.num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
.fops = &v3d_drm_fops,
+ .show_fdinfo = v3d_show_fdinfo,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
@@ -21,6 +21,18 @@ struct reset_control;
#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
+static inline char *v3d_queue_to_string(enum v3d_queue queue)
+{
+ switch (queue) {
+ case V3D_BIN: return "bin";
+ case V3D_RENDER: return "render";
+ case V3D_TFU: return "tfu";
+ case V3D_CSD: return "csd";
+ case V3D_CACHE_CLEAN: return "cache_clean";
+ }
+ return "UNKNOWN";
+}
+
struct v3d_queue_state {
struct drm_gpu_scheduler sched;
@@ -167,6 +179,12 @@ struct v3d_file_priv {
} perfmon;
struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
+
+ u64 start_ns[V3D_MAX_QUEUES];
+
+ u64 enabled_ns[V3D_MAX_QUEUES];
+
+ u64 jobs_sent[V3D_MAX_QUEUES];
};
struct v3d_bo {
@@ -238,6 +256,11 @@ struct v3d_job {
*/
struct v3d_perfmon *perfmon;
+ /* File descriptor of the process that submitted the job that could be used
+ * for collecting stats by process of GPU usage.
+ */
+ struct drm_file *file;
+
/* Callback for the freeing of the job on refcount going to 0. */
void (*free)(struct kref *ref);
};
@@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
job = *container;
job->v3d = v3d;
job->free = free;
+ job->file = file_priv;
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
v3d_priv);
@@ -14,6 +14,7 @@
*/
#include <linux/platform_device.h>
+#include <linux/sched/clock.h>
#include "v3d_drv.h"
#include "v3d_regs.h"
@@ -100,6 +101,11 @@ v3d_irq(int irq, void *arg)
if (intsts & V3D_INT_FLDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->bin_job->base.irq_fence);
+ struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
+
+ file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
+ file->jobs_sent[V3D_BIN]++;
+ file->start_ns[V3D_BIN] = 0;
trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
@@ -109,6 +115,11 @@ v3d_irq(int irq, void *arg)
if (intsts & V3D_INT_FRDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->render_job->base.irq_fence);
+ struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
+
+ file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
+ file->jobs_sent[V3D_RENDER]++;
+ file->start_ns[V3D_RENDER] = 0;
trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
@@ -118,6 +129,11 @@ v3d_irq(int irq, void *arg)
if (intsts & V3D_INT_CSDDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->csd_job->base.irq_fence);
+ struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
+
+ file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
+ file->jobs_sent[V3D_CSD]++;
+ file->start_ns[V3D_CSD] = 0;
trace_v3d_csd_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
@@ -154,6 +170,11 @@ v3d_hub_irq(int irq, void *arg)
if (intsts & V3D_HUB_INT_TFUC) {
struct v3d_fence *fence =
to_v3d_fence(v3d->tfu_job->base.irq_fence);
+ struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
+
+ file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
+ file->jobs_sent[V3D_TFU]++;
+ file->start_ns[V3D_TFU] = 0;
trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
@@ -18,6 +18,7 @@
* semaphores to interlock between them.
*/
+#include <linux/sched/clock.h>
#include <linux/kthread.h>
#include "v3d_drv.h"
@@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
{
struct v3d_bin_job *job = to_bin_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_file_priv *file = job->base.file->driver_priv;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
unsigned long irqflags;
@@ -107,6 +109,8 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ file->start_ns[V3D_BIN] = local_clock();
+
v3d_switch_perfmon(v3d, &job->base);
/* Set the current and end address of the control list.
@@ -131,6 +135,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
{
struct v3d_render_job *job = to_render_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_file_priv *file = job->base.file->driver_priv;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
@@ -158,6 +163,8 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ file->start_ns[V3D_RENDER] = local_clock();
+
v3d_switch_perfmon(v3d, &job->base);
/* XXX: Set the QCFG */
@@ -176,6 +183,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
{
struct v3d_tfu_job *job = to_tfu_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_file_priv *file = job->base.file->driver_priv;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
@@ -190,6 +198,8 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
+ file->start_ns[V3D_TFU] = local_clock();
+
V3D_WRITE(V3D_TFU_IIA, job->args.iia);
V3D_WRITE(V3D_TFU_IIS, job->args.iis);
V3D_WRITE(V3D_TFU_ICA, job->args.ica);
@@ -213,6 +223,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
{
struct v3d_csd_job *job = to_csd_job(sched_job);
struct v3d_dev *v3d = job->base.v3d;
+ struct v3d_file_priv *file = job->base.file->driver_priv;
struct drm_device *dev = &v3d->drm;
struct dma_fence *fence;
int i;
@@ -231,6 +242,8 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
+ file->start_ns[V3D_CSD] = local_clock();
+
v3d_switch_perfmon(v3d, &job->base);
for (i = 1; i <= 6; i++)
@@ -246,9 +259,16 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
{
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_dev *v3d = job->v3d;
+ struct v3d_file_priv *file = job->file->driver_priv;
+
+ file->start_ns[V3D_CACHE_CLEAN] = local_clock();
v3d_clean_caches(v3d);
+ file->enabled_ns[V3D_CACHE_CLEAN] += local_clock() - file->start_ns[V3D_CACHE_CLEAN];
+ file->jobs_sent[V3D_CACHE_CLEAN]++;
+ file->start_ns[V3D_CACHE_CLEAN] = 0;
+
return NULL;
}