diff mbox series

[5/6] drm/v3d: Add more tracepoints for V3D GPU rendering.

Message ID 20181201005759.28093-5-eric@anholt.net (mailing list archive)
State New, archived
Headers show
Series [1/6] drm/v3d: Document cache flushing ABI. | expand

Commit Message

Eric Anholt Dec. 1, 2018, 12:57 a.m. UTC
The core scheduler tells us when the job is pushed to the scheduler's
queue, and I had the job_run functions saying when they actually queue
the job to the hardware.  By adding tracepoints for the very top of
the ioctls and the IRQs signaling job completion, "perf record -a -e
v3d:.\* -e gpu_scheduler:.\* <job>; perf script" gets you a pretty
decent timeline.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/v3d/v3d_gem.c   |   4 ++
 drivers/gpu/drm/v3d/v3d_irq.c   |  19 +++++-
 drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 3 deletions(-)

Comments

Dave Emett Dec. 3, 2018, 5:41 p.m. UTC | #1
On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> The core scheduler tells us when the job is pushed to the scheduler's
> queue, and I had the job_run functions saying when they actually queue
> the job to the hardware.  By adding tracepoints for the very top of
> the ioctls and the IRQs signaling job completion, "perf record -a -e
> v3d:.\* -e gpu_scheduler:.\* <job>; perf script" gets you a pretty
> decent timeline.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> ---
>  drivers/gpu/drm/v3d/v3d_gem.c   |   4 ++
>  drivers/gpu/drm/v3d/v3d_irq.c   |  19 +++++-
>  drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++
>  3 files changed, 121 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 0bd6892e3044..2f82e2724b1f 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -484,6 +484,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>         struct drm_syncobj *sync_out;
>         int ret = 0;
>
> +       trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
> +
>         if (args->pad != 0) {
>                 DRM_INFO("pad must be zero: %d\n", args->pad);
>                 return -EINVAL;
> @@ -611,6 +613,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
>         int ret = 0;
>         int bo_count;
>
> +       trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
> +
>         job = kcalloc(1, sizeof(*job), GFP_KERNEL);
>         if (!job)
>                 return -ENOMEM;
> diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
> index dd7a7b0bd5a1..69338da70ddc 100644
> --- a/drivers/gpu/drm/v3d/v3d_irq.c
> +++ b/drivers/gpu/drm/v3d/v3d_irq.c
> @@ -15,6 +15,7 @@
>
>  #include "v3d_drv.h"
>  #include "v3d_regs.h"
> +#include "v3d_trace.h"
>
>  #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
>                              V3D_INT_FLDONE |   \
> @@ -88,12 +89,20 @@ v3d_irq(int irq, void *arg)
>         }
>
>         if (intsts & V3D_INT_FLDONE) {
> -               dma_fence_signal(v3d->bin_job->bin.done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->bin_job->bin.done_fence);
> +
> +               trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
>         if (intsts & V3D_INT_FRDONE) {
> -               dma_fence_signal(v3d->render_job->render.done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->render_job->render.done_fence);
> +
> +               trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
> @@ -119,7 +128,11 @@ v3d_hub_irq(int irq, void *arg)
>         V3D_WRITE(V3D_HUB_INT_CLR, intsts);
>
>         if (intsts & V3D_HUB_INT_TFUC) {
> -               dma_fence_signal(v3d->tfu_job->done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->tfu_job->done_fence);
> +
> +               trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
> diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
> index f54ed9cd3444..edd984afa33f 100644
> --- a/drivers/gpu/drm/v3d/v3d_trace.h
> +++ b/drivers/gpu/drm/v3d/v3d_trace.h
> @@ -12,6 +12,28 @@
>  #define TRACE_SYSTEM v3d
>  #define TRACE_INCLUDE_FILE v3d_trace
>
> +TRACE_EVENT(v3d_submit_cl_ioctl,
> +           TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
> +           TP_ARGS(dev, ct1qba, ct1qea),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u32, ct1qba)
> +                            __field(u32, ct1qea)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->ct1qba = ct1qba;
> +                          __entry->ct1qea = ct1qea;
> +                          ),
> +
> +           TP_printk("dev=%u, RCL 0x%08x..0x%08x",
> +                     __entry->dev,
> +                     __entry->ct1qba,
> +                     __entry->ct1qea)
> +);
> +
>  TRACE_EVENT(v3d_submit_cl,
>             TP_PROTO(struct drm_device *dev, bool is_render,
>                      uint64_t seqno,
> @@ -42,6 +64,85 @@ TRACE_EVENT(v3d_submit_cl,
>                       __entry->ctnqea)
>  );
>
> +TRACE_EVENT(v3d_bcl_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_rcl_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_tfu_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_submit_tfu_ioctl,
> +           TP_PROTO(struct drm_device *dev, u32 iia),
> +           TP_ARGS(dev, iia),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u32, iia)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->iia = iia;
> +                          ),
> +
> +           TP_printk("dev=%u, IIA 0x%08x",
> +                     __entry->dev,
> +                     __entry->iia)
> +);
> +
>  TRACE_EVENT(v3d_submit_tfu,
>             TP_PROTO(struct drm_device *dev,
>                      uint64_t seqno),
> --
> 2.20.0.rc1
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 0bd6892e3044..2f82e2724b1f 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -484,6 +484,8 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj *sync_out;
 	int ret = 0;
 
+	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+
 	if (args->pad != 0) {
 		DRM_INFO("pad must be zero: %d\n", args->pad);
 		return -EINVAL;
@@ -611,6 +613,8 @@  v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	int ret = 0;
 	int bo_count;
 
+	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
+
 	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
 	if (!job)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index dd7a7b0bd5a1..69338da70ddc 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -15,6 +15,7 @@ 
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
+#include "v3d_trace.h"
 
 #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
 			     V3D_INT_FLDONE |	\
@@ -88,12 +89,20 @@  v3d_irq(int irq, void *arg)
 	}
 
 	if (intsts & V3D_INT_FLDONE) {
-		dma_fence_signal(v3d->bin_job->bin.done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->bin_job->bin.done_fence);
+
+		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
 	if (intsts & V3D_INT_FRDONE) {
-		dma_fence_signal(v3d->render_job->render.done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->render_job->render.done_fence);
+
+		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
@@ -119,7 +128,11 @@  v3d_hub_irq(int irq, void *arg)
 	V3D_WRITE(V3D_HUB_INT_CLR, intsts);
 
 	if (intsts & V3D_HUB_INT_TFUC) {
-		dma_fence_signal(v3d->tfu_job->done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->tfu_job->done_fence);
+
+		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index f54ed9cd3444..edd984afa33f 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -12,6 +12,28 @@ 
 #define TRACE_SYSTEM v3d
 #define TRACE_INCLUDE_FILE v3d_trace
 
+TRACE_EVENT(v3d_submit_cl_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
+	    TP_ARGS(dev, ct1qba, ct1qea),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, ct1qba)
+			     __field(u32, ct1qea)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->ct1qba = ct1qba;
+			   __entry->ct1qea = ct1qea;
+			   ),
+
+	    TP_printk("dev=%u, RCL 0x%08x..0x%08x",
+		      __entry->dev,
+		      __entry->ct1qba,
+		      __entry->ct1qea)
+);
+
 TRACE_EVENT(v3d_submit_cl,
 	    TP_PROTO(struct drm_device *dev, bool is_render,
 		     uint64_t seqno,
@@ -42,6 +64,85 @@  TRACE_EVENT(v3d_submit_cl,
 		      __entry->ctnqea)
 );
 
+TRACE_EVENT(v3d_bcl_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_rcl_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_tfu_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_submit_tfu_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 iia),
+	    TP_ARGS(dev, iia),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, iia)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->iia = iia;
+			   ),
+
+	    TP_printk("dev=%u, IIA 0x%08x",
+		      __entry->dev,
+		      __entry->iia)
+);
+
 TRACE_EVENT(v3d_submit_tfu,
 	    TP_PROTO(struct drm_device *dev,
 		     uint64_t seqno),