diff mbox

[RFC,7/8] drm/i915: Add support for forwarding execbuffer tags in timestamp sample metadata

Message ID 1438754144-20435-8-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com Aug. 5, 2015, 5:55 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

This patch enables userspace to specify tags (per workload), provided via
execbuffer ioctl, which could be added to timestamps samples, to help
associate samples with the corresponding workloads.

There may be multiple stages within a single context, from a userspace
perspective. An ability is needed to individually associate the samples
with their corresponding workloads(execbuffers), which may not be possible
solely with ctx_id or pid information.
This patch enables this mechanism.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  2 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 20 +++++++++++++++++++-
 include/uapi/drm/i915_drm.h         |  8 +++++++-
 3 files changed, 28 insertions(+), 2 deletions(-)

Comments

Chris Wilson Aug. 5, 2015, 9:17 a.m. UTC | #1
On Wed, Aug 05, 2015 at 11:25:43AM +0530, sourab.gupta@intel.com wrote:
> @@ -555,10 +558,12 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
>  	struct drm_i915_ts_node_ctx_id *ctx_info;
>  	struct drm_i915_ts_node_ring_id *ring_info;
>  	struct drm_i915_ts_node_pid *pid_info;
> +	struct drm_i915_ts_node_tag *tag_info;
>  	struct perf_raw_record raw;
>  
>  	BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
> -			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
> +			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
> +			(TAG_INFO_SIZE != 8));

This is much more useful if each clause is independent. The error
message is then unambiguous and it looks neater.

>  	snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
>  	snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;

> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 3dcc862..db91098 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
>  	__u32 size;
>  	__u32 sample_ring:1,
>  		sample_pid:1,
> -		__reserved_1:30;
> +		sample_tag:1,
> +		__reserved_1:29;

Start each bitfield entry on its own line with __u32;
-Chris
Daniel Vetter Aug. 5, 2015, 9:29 a.m. UTC | #2
On Wed, Aug 05, 2015 at 10:17:55AM +0100, Chris Wilson wrote:
> On Wed, Aug 05, 2015 at 11:25:43AM +0530, sourab.gupta@intel.com wrote:
> > @@ -555,10 +558,12 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
> >  	struct drm_i915_ts_node_ctx_id *ctx_info;
> >  	struct drm_i915_ts_node_ring_id *ring_info;
> >  	struct drm_i915_ts_node_pid *pid_info;
> > +	struct drm_i915_ts_node_tag *tag_info;
> >  	struct perf_raw_record raw;
> >  
> >  	BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
> > -			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
> > +			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
> > +			(TAG_INFO_SIZE != 8));
> 
> This is much more useful if each clause is independent. The error
> message is then unambiguous and it looks neater.
> 
> >  	snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
> >  	snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
> 
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 3dcc862..db91098 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
> >  	__u32 size;
> >  	__u32 sample_ring:1,
> >  		sample_pid:1,
> > -		__reserved_1:30;
> > +		sample_tag:1,
> > +		__reserved_1:29;
> 
> Start each bitfield entry on its own line with __u32;

also no bitfields in uapi headers.
-Daniel

> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Robert Bragg Aug. 5, 2015, 1:59 p.m. UTC | #3
On Wed, Aug 5, 2015 at 10:29 AM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Aug 05, 2015 at 10:17:55AM +0100, Chris Wilson wrote:
>> On Wed, Aug 05, 2015 at 11:25:43AM +0530, sourab.gupta@intel.com wrote:
>> > @@ -555,10 +558,12 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
>> >     struct drm_i915_ts_node_ctx_id *ctx_info;
>> >     struct drm_i915_ts_node_ring_id *ring_info;
>> >     struct drm_i915_ts_node_pid *pid_info;
>> > +   struct drm_i915_ts_node_tag *tag_info;
>> >     struct perf_raw_record raw;
>> >
>> >     BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
>> > -                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
>> > +                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
>> > +                   (TAG_INFO_SIZE != 8));
>>
>> This is much more useful if each clause is independent. The error
>> message is then unambiguous and it looks neater.
>>
>> >     snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
>> >     snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
>>
>> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> > index 3dcc862..db91098 100644
>> > --- a/include/uapi/drm/i915_drm.h
>> > +++ b/include/uapi/drm/i915_drm.h
>> > @@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
>> >     __u32 size;
>> >     __u32 sample_ring:1,
>> >             sample_pid:1,
>> > -           __reserved_1:30;
>> > +           sample_tag:1,
>> > +           __reserved_1:29;
>>
>> Start each bitfield entry on its own line with __u32;
>
> also no bitfields in uapi headers.
> -Daniel

Ah, I had previously asked Sourab to pack the bitfields into the same
u64. I think we only get into undefined ABI territory if we have
multiple sequential bitfields in the structure where the compiler can
choose to combine them in some undefined way?

This follows the same pattern for bitfields seen in struct perf_event_attr.

I'm not sure we'll need lots of flags in our case though so perhaps it
would be fine to avoid the use of bitfields altogether here.

- Robert

>
>> -Chris
>>
>> --
>> Chris Wilson, Intel Open Source Technology Centre
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter Aug. 5, 2015, 3:25 p.m. UTC | #4
On Wed, Aug 05, 2015 at 02:59:03PM +0100, Robert Bragg wrote:
> On Wed, Aug 5, 2015 at 10:29 AM, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Wed, Aug 05, 2015 at 10:17:55AM +0100, Chris Wilson wrote:
> >> On Wed, Aug 05, 2015 at 11:25:43AM +0530, sourab.gupta@intel.com wrote:
> >> > @@ -555,10 +558,12 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
> >> >     struct drm_i915_ts_node_ctx_id *ctx_info;
> >> >     struct drm_i915_ts_node_ring_id *ring_info;
> >> >     struct drm_i915_ts_node_pid *pid_info;
> >> > +   struct drm_i915_ts_node_tag *tag_info;
> >> >     struct perf_raw_record raw;
> >> >
> >> >     BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
> >> > -                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
> >> > +                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
> >> > +                   (TAG_INFO_SIZE != 8));
> >>
> >> This is much more useful if each clause is independent. The error
> >> message is then unambiguous and it looks neater.
> >>
> >> >     snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
> >> >     snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
> >>
> >> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> >> > index 3dcc862..db91098 100644
> >> > --- a/include/uapi/drm/i915_drm.h
> >> > +++ b/include/uapi/drm/i915_drm.h
> >> > @@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
> >> >     __u32 size;
> >> >     __u32 sample_ring:1,
> >> >             sample_pid:1,
> >> > -           __reserved_1:30;
> >> > +           sample_tag:1,
> >> > +           __reserved_1:29;
> >>
> >> Start each bitfield entry on its own line with __u32;
> >
> > also no bitfields in uapi headers.
> > -Daniel
> 
> Ah, I had previously asked Sourab to pack the bitfields into the same
> u64. I think we only get into undefined ABI territory if we have
> multiple sequential bitfields in the structure where the compiler can
> choose to combine them in some undefined way?
> 
> This follows the same pattern for bitfields seen in struct perf_event_attr.
> 
> I'm not sure we'll need lots of flags in our case though so perhaps it
> would be fine to avoid the use of bitfields altogether here.

It might be uapi cargo culting, but I'm just not sure ;-) The other
problem with bitfields is that it's fickle properly size the reserved
fields, and we need those to correctly reject unused flags. Otherwise
userspace might but garbage in there and extendability is out the window.
-Daniel
Robert Bragg Aug. 5, 2015, 4:48 p.m. UTC | #5
On Wed, Aug 5, 2015 at 4:25 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Aug 05, 2015 at 02:59:03PM +0100, Robert Bragg wrote:
>> On Wed, Aug 5, 2015 at 10:29 AM, Daniel Vetter <daniel@ffwll.ch> wrote:
>> > On Wed, Aug 05, 2015 at 10:17:55AM +0100, Chris Wilson wrote:
>> >> On Wed, Aug 05, 2015 at 11:25:43AM +0530, sourab.gupta@intel.com wrote:
>> >> > @@ -555,10 +558,12 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
>> >> >     struct drm_i915_ts_node_ctx_id *ctx_info;
>> >> >     struct drm_i915_ts_node_ring_id *ring_info;
>> >> >     struct drm_i915_ts_node_pid *pid_info;
>> >> > +   struct drm_i915_ts_node_tag *tag_info;
>> >> >     struct perf_raw_record raw;
>> >> >
>> >> >     BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
>> >> > -                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
>> >> > +                   (RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
>> >> > +                   (TAG_INFO_SIZE != 8));
>> >>
>> >> This is much more useful if each clause is independent. The error
>> >> message is then unambiguous and it looks neater.
>> >>
>> >> >     snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
>> >> >     snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
>> >>
>> >> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> >> > index 3dcc862..db91098 100644
>> >> > --- a/include/uapi/drm/i915_drm.h
>> >> > +++ b/include/uapi/drm/i915_drm.h
>> >> > @@ -104,7 +104,8 @@ struct drm_i915_gen_pmu_attr {
>> >> >     __u32 size;
>> >> >     __u32 sample_ring:1,
>> >> >             sample_pid:1,
>> >> > -           __reserved_1:30;
>> >> > +           sample_tag:1,
>> >> > +           __reserved_1:29;
>> >>
>> >> Start each bitfield entry on its own line with __u32;
>> >
>> > also no bitfields in uapi headers.
>> > -Daniel
>>
>> Ah, I had previously asked Sourab to pack the bitfields into the same
>> u64. I think we only get into undefined ABI territory if we have
>> multiple sequential bitfields in the structure where the compiler can
>> choose to combine them in some undefined way?
>>
>> This follows the same pattern for bitfields seen in struct perf_event_attr.
>>
>> I'm not sure we'll need lots of flags in our case though so perhaps it
>> would be fine to avoid the use of bitfields altogether here.
>
> It might be uapi cargo culting, but I'm just not sure ;-) The other
> problem with bitfields is that it's fickle properly size the reserved
> fields, and we need those to correctly reject unused flags. Otherwise
> userspace might but garbage in there and extendability is out the window.

In my latest branch (sorry I haven't sent out a recent RFC myself as
I'm hoping to update public Gen Observability docs before I do that) I
ended up slightly generalizing and exporting perf_copy_attr() in
kernel/events/core.c to use the same tested code to help with this.
Core perf's approach to versioning + extending the attributes
structure seems pretty decent.

That said though regarding unused/reserved fields I realise now I did
miss an important check within the i915_oa code that core perf has
which is to explicitly return -EINVAL if __reserved_1 != 0.

Maybe that should be taken as a case in point.

- Robert

> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f46687a..c3e823f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1685,6 +1685,7 @@  struct i915_gen_pmu_node {
 	u32 ctx_id;
 	u32 ring;
 	u32 pid;
+	u32 tag;
 };
 
 extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -2020,6 +2021,7 @@  struct drm_i915_private {
 		struct work_struct event_destroy_work;
 #define I915_GEN_PMU_SAMPLE_RING		(1<<0)
 #define I915_GEN_PMU_SAMPLE_PID			(1<<1)
+#define I915_GEN_PMU_SAMPLE_TAG			(1<<2)
 		int sample_info_flags;
 	} gen_pmu;
 
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index f73d23c..e065e06 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -15,6 +15,7 @@ 
 #define CTX_INFO_SIZE sizeof(struct drm_i915_ts_node_ctx_id)
 #define RING_INFO_SIZE sizeof(struct drm_i915_ts_node_ring_id)
 #define PID_INFO_SIZE sizeof(struct drm_i915_ts_node_pid)
+#define TAG_INFO_SIZE sizeof(struct drm_i915_ts_node_tag)
 
 static u32 i915_oa_event_paranoid = true;
 
@@ -148,6 +149,8 @@  static void i915_gen_emit_ts_data(struct drm_i915_gem_request *req,
 		entry->ring = ring_id_mask(ring);
 	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
 		entry->pid = current->pid;
+	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
+		entry->tag = tag;
 	i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
 
 	spin_lock(&dev_priv->gen_pmu.lock);
@@ -555,10 +558,12 @@  static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
 	struct drm_i915_ts_node_ctx_id *ctx_info;
 	struct drm_i915_ts_node_ring_id *ring_info;
 	struct drm_i915_ts_node_pid *pid_info;
+	struct drm_i915_ts_node_tag *tag_info;
 	struct perf_raw_record raw;
 
 	BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
-			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8));
+			(RING_INFO_SIZE != 8) || (PID_INFO_SIZE != 8) ||
+			(TAG_INFO_SIZE != 8));
 
 	snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
 	snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
@@ -581,6 +586,13 @@  static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv,
 		current_ptr = snapshot + snapshot_size;
 	}
 
+	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG) {
+		tag_info = (struct drm_i915_ts_node_tag *)current_ptr;
+		tag_info->tag = node->tag;
+		snapshot_size += TAG_INFO_SIZE;
+		current_ptr = snapshot + snapshot_size;
+	}
+
 	/* Note: the raw sample consists of a u32 size member and raw data. The
 	 * combined size of these two fields is required to be 8 byte aligned.
 	 * The size of raw data field is assumed to be 8 byte aligned already.
@@ -1031,6 +1043,9 @@  static int init_gen_pmu_buffer(struct perf_event *event)
 	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_PID)
 		node_size += PID_INFO_SIZE;
 
+	if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG)
+		node_size += TAG_INFO_SIZE;
+
 	/* size has to be aligned to 8 bytes */
 	node_size = ALIGN(node_size, 8);
 	dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1652,6 +1667,9 @@  static int i915_gen_event_init(struct perf_event *event)
 	if (gen_attr.sample_pid)
 		dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_PID;
 
+	if (gen_attr.sample_tag)
+		dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG;
+
 	/* To avoid the complexity of having to accurately filter
 	 * data and marshal to the appropriate client
 	 * we currently only allow exclusive access */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3dcc862..db91098 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -104,7 +104,8 @@  struct drm_i915_gen_pmu_attr {
 	__u32 size;
 	__u32 sample_ring:1,
 		sample_pid:1,
-		__reserved_1:30;
+		sample_tag:1,
+		__reserved_1:29;
 };
 
 /* Header for PERF_RECORD_DEVICE type events */
@@ -169,6 +170,11 @@  struct drm_i915_ts_node_pid {
 	__u32 pad;
 };
 
+struct drm_i915_ts_node_tag {
+	__u32 tag;
+	__u32 pad;
+};
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use