Message ID | 1436950306-14147-9-git-send-email-sourab.gupta@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 15, 2015 at 02:21:46PM +0530, sourab.gupta@intel.com wrote: > From: Sourab Gupta <sourab.gupta@intel.com> > > This patch adds support for retrieving MMIO register values alongwith > timestamps and forwarding them to userspace through perf. > The userspace can request upto 8 MMIO register values to be dumped. > The addresses of upto 8 MMIO registers can be passed through perf attr > config. The registers are checked against a whitelist before passing them > on. The commands to dump the values of these MMIO registers are then > inserted into the ring alongwith commands to dump the timestamps. > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_oa_perf.c | 87 ++++++++++++++++++++++++++++++++++--- > include/uapi/drm/i915_drm.h | 10 ++++- > 3 files changed, 92 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index f2fe8d0..e114175 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2017,7 +2017,9 @@ struct drm_i915_private { > #define I915_GEN_PMU_SAMPLE_RING (1<<0) > #define I915_GEN_PMU_SAMPLE_PID (1<<1) > #define I915_GEN_PMU_SAMPLE_TAG (1<<2) > +#define I915_GEN_PMU_SAMPLE_MMIO (1<<3) > int sample_info_flags; > + u32 mmio_list[8]; > } gen_pmu; > > void (*insert_profile_cmd[I915_PROFILE_MAX]) > diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c > index 1cc16ef..a9d44e0 100644 > --- a/drivers/gpu/drm/i915/i915_oa_perf.c > +++ b/drivers/gpu/drm/i915/i915_oa_perf.c > @@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj; > struct i915_gen_pmu_node *entry; > unsigned long lock_flags; > - u32 addr = 0; > - int ret; > + u32 mmio_addr, addr = 0; > + int ret, i; > > entry = kzalloc(sizeof(*entry), GFP_KERNEL); > if (entry == NULL) { > @@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); > > addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; > + mmio_addr = addr + sizeof(struct drm_i915_ts_data); > > if (ring->id == RCS) { > ret = intel_ring_begin(ring, 6); > @@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > intel_ring_emit(ring, 0); /* imm high, must be zero */ > intel_ring_advance(ring); > } > + for (i = 0; i < 8; i++) { > + if (0 == dev_priv->gen_pmu.mmio_list[i]) > + break; > + > + addr = mmio_addr + > + i * sizeof(dev_priv->gen_pmu.mmio_list[i]); > + > + ret = intel_ring_begin(ring, 4); > + if (ret) > + return; > + > + intel_ring_emit(ring, > + MI_STORE_REGISTER_MEM(1) | > + MI_SRM_LRM_GLOBAL_GTT); > + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]); > + intel_ring_emit(ring, addr); > + intel_ring_emit(ring, MI_NOOP); > + intel_ring_advance(ring); Premature optimisation, but can we store the register array with a single command? for (i = 0; i < 8; i++) { if (0 == dev_priv->gen_pmu.mmio_list[i]) break; ret = intel_ring_begin(ring, 2*i + 2); if (ret) return; intel_ring_emit(MI_STORE_REGISTER_MEM(i)); for (i = 0; i < 8; i++) { if (0 == dev_priv->gen_pmu.mmio_list[i]) break; intel_ring_emit(dev_priv->gen_pmu.mmio_list[i]); intel_ring_emit(addr + i*4); } intel_ring_emit(MI_NOOP); You should also consider ensuring that we have a command barrier between these and 3d operations. If only we had engine->emit_flush(I915_COMMAND_BARRIER). > +/* Some embargoed entries missing from whitelist */ > +static const struct register_whitelist { > + uint64_t offset; > + uint32_t size; > + /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */ > + uint32_t gen_bitmask; > +} whitelist[] = { > + { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) }, > + { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) }, > +}; > + > +static int check_mmio_whitelist(struct drm_i915_private *dev_priv, > + struct drm_i915_gen_pmu_attr *gen_attr) Just what I came looking for. > +{ > + struct register_whitelist const *entry = whitelist; > + int i, count; > + > + for (count = 0; count < 8; count++) { hardcoded value, ARRAY_SIZE(gen_attr->mmio_list) > + if (!gen_attr->mmio_list[count]) > + break; > + > + for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { Interesting choice of continuation in the ABI. Seems a litte forced. Move the whitelist into the function, no one else should think of accessing it (right?), then just use whitelist[i]. > + if (entry->offset == gen_attr->mmio_list[count] && > + (1 << INTEL_INFO(dev_priv->dev)->gen & dev_priv->dev->dev_priv->info, you have to be kidding me. INTEL_INFO(dev_priv) is the natural form. > + entry->gen_bitmask)) > + break; > + } > + > + if (i == ARRAY_SIZE(whitelist)) > + return -EINVAL; > + } > + return 0; > +} > + > static int i915_gen_event_init(struct perf_event *event) > { > struct drm_i915_private *dev_priv = > @@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event *event) > if (gen_attr.sample_tag) > dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG; > > + if (gen_attr.sample_mmio) { > + ret = check_mmio_whitelist(dev_priv, &gen_attr); > + if (ret) > + return ret; Global hw state should only be inspectable by root (unless the system has relaxed perf permissions). > + dev_priv->gen_pmu.sample_info_flags |= > + I915_GEN_PMU_SAMPLE_MMIO; > + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list, > + sizeof(dev_priv->gen_pmu.mmio_list)); > + } > + > /* To avoid the complexity of having to accurately filter > * data and marshal to the appropriate client > * we currently only allow exclusive access */ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 7ab4972..65bc39d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -81,7 +81,7 @@ > > #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */ > > -#define I915_GEN_PMU_ATTR_SIZE_VER0 8 /* sizeof first published struct */ > +#define I915_GEN_PMU_ATTR_SIZE_VER0 40 /* sizeof first published struct */ > > typedef struct _drm_i915_oa_attr { > __u32 size; > @@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr { > __u32 sample_ring:1, > sample_pid:1, > sample_tag:1, > - __reserved_1:29; > + sample_mmio:1, > + __reserved_1:28; > + __u32 mmio_list[8]; > }; seems like having a BUILD_BUG_ON(sizeof(struct _drm_i915_oa_attr) != I915_GEN_PMU_ATTR_SIZE_VER0) is in order. -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f2fe8d0..e114175 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2017,7 +2017,9 @@ struct drm_i915_private { #define I915_GEN_PMU_SAMPLE_RING (1<<0) #define I915_GEN_PMU_SAMPLE_PID (1<<1) #define I915_GEN_PMU_SAMPLE_TAG (1<<2) +#define I915_GEN_PMU_SAMPLE_MMIO (1<<3) int sample_info_flags; + u32 mmio_list[8]; } gen_pmu; void (*insert_profile_cmd[I915_PROFILE_MAX]) diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 1cc16ef..a9d44e0 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj; struct i915_gen_pmu_node *entry; unsigned long lock_flags; - u32 addr = 0; - int ret; + u32 mmio_addr, addr = 0; + int ret, i; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { @@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; + mmio_addr = addr + sizeof(struct drm_i915_ts_data); if (ring->id == RCS) { ret = intel_ring_begin(ring, 6); @@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, intel_ring_emit(ring, 0); /* imm high, must be zero */ intel_ring_advance(ring); } + for (i = 0; i < 8; i++) { + if (0 == dev_priv->gen_pmu.mmio_list[i]) + break; + + addr = mmio_addr + + i * sizeof(dev_priv->gen_pmu.mmio_list[i]); + + ret = intel_ring_begin(ring, 4); + if (ret) + return; + + intel_ring_emit(ring, + MI_STORE_REGISTER_MEM(1) | + MI_SRM_LRM_GLOBAL_GTT); + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]); + intel_ring_emit(ring, addr); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } obj->base.write_domain = I915_GEM_DOMAIN_RENDER; i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); @@ -556,7 +576,7 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, { struct perf_sample_data data; struct perf_event *event = dev_priv->gen_pmu.exclusive_event; - int ts_size, snapshot_size; + int ts_size, mmio_size, snapshot_size; u8 *snapshot, *current_ptr; struct drm_i915_ts_node_ctx_id *ctx_info; struct drm_i915_ts_node_ring_id *ring_info; @@ -565,10 +585,17 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, struct perf_raw_record raw; ts_size = sizeof(struct drm_i915_ts_data); - snapshot_size = ts_size + sizeof(*ctx_info); + + if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO) + mmio_size = sizeof(struct drm_i915_mmio_data); + else + mmio_size = 0; + snapshot = dev_priv->gen_pmu.buffer.addr + node->offset; + snapshot_size = ts_size + mmio_size + sizeof(*ctx_info); - ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + ts_size); + ctx_info = (struct drm_i915_ts_node_ctx_id *) + (snapshot + mmio_size + ts_size); ctx_info->ctx_id = node->ctx_id; current_ptr = snapshot + snapshot_size; @@ -1053,6 +1080,9 @@ static int init_gen_pmu_buffer(struct perf_event *event) if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG) node_size += sizeof(struct drm_i915_ts_node_tag); + if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO) + node_size += sizeof(struct drm_i915_mmio_data); + /* size has to be aligned to 8 bytes (required by relevant gpu cmds) */ node_size = ALIGN(node_size, 8); dev_priv->gen_pmu.buffer.node_size = node_size; @@ -1648,6 +1678,42 @@ err_size: goto out; } +#define GEN_RANGE(l, h) GENMASK(h, l) + +/* Some embargoed entries missing from whitelist */ +static const struct register_whitelist { + uint64_t offset; + uint32_t size; + /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */ + uint32_t gen_bitmask; +} whitelist[] = { + { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) }, + { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) }, +}; + +static int check_mmio_whitelist(struct drm_i915_private *dev_priv, + struct drm_i915_gen_pmu_attr *gen_attr) +{ + struct register_whitelist const *entry = whitelist; + int i, count; + + for (count = 0; count < 8; count++) { + if (!gen_attr->mmio_list[count]) + break; + + for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { + if (entry->offset == gen_attr->mmio_list[count] && + (1 << INTEL_INFO(dev_priv->dev)->gen & + entry->gen_bitmask)) + break; + } + + if (i == ARRAY_SIZE(whitelist)) + return -EINVAL; + } + return 0; +} + static int i915_gen_event_init(struct perf_event *event) { struct drm_i915_private *dev_priv = @@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event *event) if (gen_attr.sample_tag) dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG; + if (gen_attr.sample_mmio) { + ret = check_mmio_whitelist(dev_priv, &gen_attr); + if (ret) + return ret; + + dev_priv->gen_pmu.sample_info_flags |= + I915_GEN_PMU_SAMPLE_MMIO; + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list, + sizeof(dev_priv->gen_pmu.mmio_list)); + } + /* To avoid the complexity of having to accurately filter * data and marshal to the appropriate client * we currently only allow exclusive access */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ab4972..65bc39d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -81,7 +81,7 @@ #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */ -#define I915_GEN_PMU_ATTR_SIZE_VER0 8 /* sizeof first published struct */ +#define I915_GEN_PMU_ATTR_SIZE_VER0 40 /* sizeof first published struct */ typedef struct _drm_i915_oa_attr { __u32 size; @@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr { __u32 sample_ring:1, sample_pid:1, sample_tag:1, - __reserved_1:29; + sample_mmio:1, + __reserved_1:28; + __u32 mmio_list[8]; }; /* Header for PERF_RECORD_DEVICE type events */ @@ -155,6 +157,10 @@ struct drm_i915_ts_data { __u32 ts_high; }; +struct drm_i915_mmio_data { + __u32 mmio[8]; +}; + struct drm_i915_ts_node_ctx_id { __u32 ctx_id; __u32 pad;