diff mbox

[02/21] drm/etnaviv: add uapi for perfmon feature

Message ID 20170609102123.2417-3-christian.gmeiner@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian Gmeiner June 9, 2017, 10:21 a.m. UTC
Sadly we can not read any registers via command stream so we need
to extend the drm_etnaviv_gem_submit struct with performance monitor
requests. Those requests gets process before and/or after the actual
submitted command stream.

The Vivante kernel driver has a special ioctl to read all perfmon
registers at once and return it. There is no connection between
a specifc command stream and the read perf values.

Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
---
 include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Lucas Stach June 26, 2017, 12:56 p.m. UTC | #1
Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner:
> Sadly we can not read any registers via command stream so we need
> to extend the drm_etnaviv_gem_submit struct with performance monitor
> requests. Those requests gets process before and/or after the actual
> submitted command stream.
> 
> The Vivante kernel driver has a special ioctl to read all perfmon
> registers at once and return it. There is no connection between
> a specifc command stream and the read perf values.
> 
> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
> ---
>  include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/uapi/drm/etnaviv_drm.h
> b/include/uapi/drm/etnaviv_drm.h
> index 9e488a0..b67a7dd 100644
> --- a/include/uapi/drm/etnaviv_drm.h
> +++ b/include/uapi/drm/etnaviv_drm.h
> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo {
>  	__u64 presumed;       /* in/out, presumed buffer address */
>  };
>  
> +/* performance monitor request (pmr) */
> +#define ETNA_PM_PROCESS_PRE             0x0001
> +#define ETNA_PM_PROCESS_POST            0x0002
> +struct drm_etnaviv_gem_submit_pmr {
> +	__u32 flags;          /* in, when to process request
> (ETNA_PM_PROCESS_x) */
> +	__u8  domain;         /* in, pm domain */
> +	__u8  signal;         /* in, pm signal */

Are we sure that no domain will ever have more than 256 signals?

> +	__u16 pad[3];

Unnecessary large pad. A single u16 is enough to naturally align the
next member.

> +	__u32 sequence;       /* in, sequence number */
> +	__u32 read_offset;    /* in, offset from read_bo */
> +	__u32 read_idx;       /* in, index of read_bo buffer */
> +};
> +
>  /* Each cmdstream submit consists of a table of buffers involved,
> and
>   * one or more cmdstream buffers.  This allows for conditional
> execution
>   * (context-restore), and IB buffers needed for per tile/bin draw
> cmds.
> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit {
>  	__u64 stream;         /* in, ptr to cmdstream */
>  	__u32 flags;          /* in, mask of ETNA_SUBMIT_x */
>  	__s32 fence_fd;       /* in/out, fence fd (see
> ETNA_SUBMIT_FENCE_FD_x) */
> +	__u64 pmrs;           /* in, ptr to array of submit_pmr's */
> +	__u32 nr_pmrs;        /* in, number of submit_pmr's */
> +	__u32 pad;

This pad isn't needed.

>  };
>  
>  /* The normal way to synchronize with the GPU is just to CPU_PREP on
Christian Gmeiner July 2, 2017, 10:04 a.m. UTC | #2
2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>:
> Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner:
>> Sadly we can not read any registers via command stream so we need
>> to extend the drm_etnaviv_gem_submit struct with performance monitor
>> requests. Those requests gets process before and/or after the actual
>> submitted command stream.
>>
>> The Vivante kernel driver has a special ioctl to read all perfmon
>> registers at once and return it. There is no connection between
>> a specifc command stream and the read perf values.
>>
>> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
>> ---
>>  include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++
>>  1 file changed, 16 insertions(+)
>>
>> diff --git a/include/uapi/drm/etnaviv_drm.h
>> b/include/uapi/drm/etnaviv_drm.h
>> index 9e488a0..b67a7dd 100644
>> --- a/include/uapi/drm/etnaviv_drm.h
>> +++ b/include/uapi/drm/etnaviv_drm.h
>> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo {
>>       __u64 presumed;       /* in/out, presumed buffer address */
>>  };
>>
>> +/* performance monitor request (pmr) */
>> +#define ETNA_PM_PROCESS_PRE             0x0001
>> +#define ETNA_PM_PROCESS_POST            0x0002
>> +struct drm_etnaviv_gem_submit_pmr {
>> +     __u32 flags;          /* in, when to process request
>> (ETNA_PM_PROCESS_x) */
>> +     __u8  domain;         /* in, pm domain */
>> +     __u8  signal;         /* in, pm signal */
>
> Are we sure that no domain will ever have more than 256 signals?
>

My crystal ball is not working as expected so I don't know - but I will
change it to a __u16.

>> +     __u16 pad[3];
>
> Unnecessary large pad. A single u16 is enough to naturally align the
> next member.
>

Correct.

>> +     __u32 sequence;       /* in, sequence number */
>> +     __u32 read_offset;    /* in, offset from read_bo */
>> +     __u32 read_idx;       /* in, index of read_bo buffer */
>> +};
>> +
>>  /* Each cmdstream submit consists of a table of buffers involved,
>> and
>>   * one or more cmdstream buffers.  This allows for conditional
>> execution
>>   * (context-restore), and IB buffers needed for per tile/bin draw
>> cmds.
>> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit {
>>       __u64 stream;         /* in, ptr to cmdstream */
>>       __u32 flags;          /* in, mask of ETNA_SUBMIT_x */
>>       __s32 fence_fd;       /* in/out, fence fd (see
>> ETNA_SUBMIT_FENCE_FD_x) */
>> +     __u64 pmrs;           /* in, ptr to array of submit_pmr's */
>> +     __u32 nr_pmrs;        /* in, number of submit_pmr's */
>> +     __u32 pad;
>
> This pad isn't needed.
>

 * Pad the entire struct to a multiple of 64-bits if the structure contains
   64-bit types - the structure size will otherwise differ on 32-bit versus
   64-bit. Having a different structure size hurts when passing arrays of
   structures to the kernel, or if the kernel checks the structure size, which
   e.g. the drm core does.

$ pahole drivers/gpu/drm/etnaviv/etnaviv.o -C drm_etnaviv_gem_submit
struct drm_etnaviv_gem_submit {
        __u32                      fence;                /*     0     4 */
        __u32                      pipe;                 /*     4     4 */
        __u32                      exec_state;           /*     8     4 */
        __u32                      nr_bos;               /*    12     4 */
        __u32                      nr_relocs;            /*    16     4 */
        __u32                      stream_size;          /*    20     4 */
        __u64                      bos;                  /*    24     8 */
        __u64                      relocs;               /*    32     8 */
        __u64                      stream;               /*    40     8 */
        __u32                      flags;                /*    48     4 */
        __s32                      fence_fd;             /*    52     4 */
        __u64                      pmrs;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        __u32                      nr_pmrs;              /*    64     4 */
        __u32                      pad;                  /*    68     4 */

        /* size: 72, cachelines: 2, members: 14 */
        /* last cacheline: 8 bytes */
};


>>  };
>>
>>  /* The normal way to synchronize with the GPU is just to CPU_PREP on

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
Lucas Stach July 3, 2017, 8:22 a.m. UTC | #3
Am Sonntag, den 02.07.2017, 12:04 +0200 schrieb Christian Gmeiner:
> 2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>:
> > Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner:
> >> Sadly we can not read any registers via command stream so we need
> >> to extend the drm_etnaviv_gem_submit struct with performance monitor
> >> requests. Those requests gets process before and/or after the actual
> >> submitted command stream.
> >>
> >> The Vivante kernel driver has a special ioctl to read all perfmon
> >> registers at once and return it. There is no connection between
> >> a specifc command stream and the read perf values.
> >>
> >> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
> >> ---
> >>  include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++
> >>  1 file changed, 16 insertions(+)
> >>
> >> diff --git a/include/uapi/drm/etnaviv_drm.h
> >> b/include/uapi/drm/etnaviv_drm.h
> >> index 9e488a0..b67a7dd 100644
> >> --- a/include/uapi/drm/etnaviv_drm.h
> >> +++ b/include/uapi/drm/etnaviv_drm.h
> >> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo {
> >>       __u64 presumed;       /* in/out, presumed buffer address */
> >>  };
> >>
> >> +/* performance monitor request (pmr) */
> >> +#define ETNA_PM_PROCESS_PRE             0x0001
> >> +#define ETNA_PM_PROCESS_POST            0x0002
> >> +struct drm_etnaviv_gem_submit_pmr {
> >> +     __u32 flags;          /* in, when to process request
> >> (ETNA_PM_PROCESS_x) */
> >> +     __u8  domain;         /* in, pm domain */
> >> +     __u8  signal;         /* in, pm signal */
> >
> > Are we sure that no domain will ever have more than 256 signals?
> >
> 
> My crystal ball is not working as expected so I don't know - but I will
> change it to a __u16.
> 
> >> +     __u16 pad[3];
> >
> > Unnecessary large pad. A single u16 is enough to naturally align the
> > next member.
> >
> 
> Correct.
> 
> >> +     __u32 sequence;       /* in, sequence number */
> >> +     __u32 read_offset;    /* in, offset from read_bo */
> >> +     __u32 read_idx;       /* in, index of read_bo buffer */
> >> +};
> >> +
> >>  /* Each cmdstream submit consists of a table of buffers involved,
> >> and
> >>   * one or more cmdstream buffers.  This allows for conditional
> >> execution
> >>   * (context-restore), and IB buffers needed for per tile/bin draw
> >> cmds.
> >> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit {
> >>       __u64 stream;         /* in, ptr to cmdstream */
> >>       __u32 flags;          /* in, mask of ETNA_SUBMIT_x */
> >>       __s32 fence_fd;       /* in/out, fence fd (see
> >> ETNA_SUBMIT_FENCE_FD_x) */
> >> +     __u64 pmrs;           /* in, ptr to array of submit_pmr's */
> >> +     __u32 nr_pmrs;        /* in, number of submit_pmr's */
> >> +     __u32 pad;
> >
> > This pad isn't needed.
> >
> 
>  * Pad the entire struct to a multiple of 64-bits if the structure contains
>    64-bit types - the structure size will otherwise differ on 32-bit versus
>    64-bit. Having a different structure size hurts when passing arrays of
>    structures to the kernel, or if the kernel checks the structure size, which
>    e.g. the drm core does.

Okay it is still not really necessary (we don't ever have pass an array
of struct drm_etnaviv_gem_submit and the worst that could happen is that
the DRM core needs to zero the added struct padding on 64bit), but it
also doesn't hurt and skipping the zero fill on 64bit sounds good, so
please keep as-is.

Regards,
Lucas
diff mbox

Patch

diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 9e488a0..b67a7dd 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -150,6 +150,19 @@  struct drm_etnaviv_gem_submit_bo {
 	__u64 presumed;       /* in/out, presumed buffer address */
 };
 
+/* performance monitor request (pmr) */
+#define ETNA_PM_PROCESS_PRE             0x0001
+#define ETNA_PM_PROCESS_POST            0x0002
+struct drm_etnaviv_gem_submit_pmr {
+	__u32 flags;          /* in, when to process request (ETNA_PM_PROCESS_x) */
+	__u8  domain;         /* in, pm domain */
+	__u8  signal;         /* in, pm signal */
+	__u16 pad[3];
+	__u32 sequence;       /* in, sequence number */
+	__u32 read_offset;    /* in, offset from read_bo */
+	__u32 read_idx;       /* in, index of read_bo buffer */
+};
+
 /* Each cmdstream submit consists of a table of buffers involved, and
  * one or more cmdstream buffers.  This allows for conditional execution
  * (context-restore), and IB buffers needed for per tile/bin draw cmds.
@@ -175,6 +188,9 @@  struct drm_etnaviv_gem_submit {
 	__u64 stream;         /* in, ptr to cmdstream */
 	__u32 flags;          /* in, mask of ETNA_SUBMIT_x */
 	__s32 fence_fd;       /* in/out, fence fd (see ETNA_SUBMIT_FENCE_FD_x) */
+	__u64 pmrs;           /* in, ptr to array of submit_pmr's */
+	__u32 nr_pmrs;        /* in, number of submit_pmr's */
+	__u32 pad;
 };
 
 /* The normal way to synchronize with the GPU is just to CPU_PREP on