diff mbox

[RFC,1/8] drm/i915: Add a new PMU for handling non-OA counter data profiling requests

Message ID 1436950306-14147-2-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com July 15, 2015, 8:51 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

The current perf PMU driver is specific for collection of OA counter
statistics (which may be done in a periodic or asynchronous way). Since
this enables us (and limits us) to render ring, we have no means for
collection of data pertaining to other rings.

To overcome this limitation, we need to have a new PMU driver which enables
data collection for other rings also (in a non-OA specific mode).
This patch adds a new perf PMU to i915 device private, for handling
profiling requests for non-OA counter data.This data may encompass
timestamps, mmio register values, etc. for the relevant ring.
The new perf PMU will serve these purposes, without constraining itself to
type of data being dumped (which may restrict the user to specific ring
like in case of OA counters).

The patch introduces this PMU driver alongwith its associated callbacks.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c     |   2 +
 drivers/gpu/drm/i915/i915_drv.h     |  18 +++
 drivers/gpu/drm/i915/i915_oa_perf.c | 220 ++++++++++++++++++++++++++++++++++++
 3 files changed, 240 insertions(+)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0553f20..4b91504 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -822,6 +822,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	 * otherwise i915_oa_context_pin_notify() will lock an un-initialized
 	 * spinlock, upsetting lockdep checks */
 	i915_oa_pmu_register(dev);
+	i915_gen_pmu_register(dev);
 
 	intel_pm_setup(dev);
 
@@ -1072,6 +1073,7 @@  int i915_driver_unload(struct drm_device *dev)
 		return ret;
 	}
 
+	i915_gen_pmu_unregister(dev);
 	i915_oa_pmu_unregister(dev);
 	intel_power_domains_fini(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9409b4a..7e8e77b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1985,6 +1985,20 @@  struct drm_i915_private {
 		int sample_info_flags;
 	} oa_pmu;
 
+	struct {
+		struct pmu pmu;
+		spinlock_t lock;
+		struct hrtimer timer;
+		struct pt_regs dummy_regs;
+		struct perf_event *exclusive_event;
+		bool event_active;
+
+		struct {
+			struct drm_i915_gem_object *obj;
+			u8 *addr;
+		} buffer;
+	} gen_pmu;
+
 	void (*insert_profile_cmd[I915_PROFILE_MAX])
 		(struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag);
 #endif
@@ -3292,10 +3306,14 @@  int i915_parse_cmds(struct intel_engine_cs *ring,
 /* i915_oa_perf.c */
 #ifdef CONFIG_PERF_EVENTS
 extern void i915_oa_pmu_register(struct drm_device *dev);
+extern void i915_gen_pmu_register(struct drm_device *dev);
 extern void i915_oa_pmu_unregister(struct drm_device *dev);
+extern void i915_gen_pmu_unregister(struct drm_device *dev);
 #else
 static inline void i915_oa_pmu_register(struct drm_device *dev) {}
+static inline void i915_gen_pmu_register(struct drm_device *dev) {}
 static inline void i915_oa_pmu_unregister(struct drm_device *dev) {}
+static inline void i915_gen_pmu_unregister(struct drm_device *dev) {}
 #endif
 
 /* i915_suspend.c */
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 839ebb4..ab965b4 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -408,6 +408,13 @@  void forward_oa_rcs_snapshots_work(struct work_struct *__work)
 	}
 }
 
+static void gen_pmu_flush_snapshots(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!dev_priv->gen_pmu.buffer.addr);
+
+	/* TODO: routine for forwarding snapshots to userspace */
+}
+
 static void
 oa_rcs_buffer_destroy(struct drm_i915_private *i915)
 {
@@ -561,6 +568,35 @@  out:
 	spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags);
 }
 
+static void gen_buffer_destroy(struct drm_i915_private *i915)
+{
+	unsigned long lock_flags;
+
+	mutex_lock(&i915->dev->struct_mutex);
+	vunmap(i915->gen_pmu.buffer.addr);
+	i915_gem_object_ggtt_unpin(i915->gen_pmu.buffer.obj);
+	drm_gem_object_unreference(&i915->gen_pmu.buffer.obj->base);
+	mutex_unlock(&i915->dev->struct_mutex);
+
+	spin_lock_irqsave(&i915->gen_pmu.lock, lock_flags);
+	i915->gen_pmu.buffer.obj = NULL;
+	i915->gen_pmu.buffer.addr = NULL;
+	spin_unlock_irqrestore(&i915->gen_pmu.lock, lock_flags);
+}
+
+static void i915_gen_event_destroy(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	WARN_ON(event->parent);
+
+	gen_buffer_destroy(i915);
+
+	BUG_ON(i915->gen_pmu.exclusive_event != event);
+	i915->gen_pmu.exclusive_event = NULL;
+}
+
 static int alloc_obj(struct drm_i915_private *dev_priv,
 				struct drm_i915_gem_object **obj)
 {
@@ -720,6 +756,40 @@  static int init_oa_rcs_buffer(struct perf_event *event)
 	return 0;
 }
 
+static int init_gen_pmu_buffer(struct perf_event *event)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	struct drm_i915_gem_object *bo;
+	int ret;
+
+	BUG_ON(dev_priv->gen_pmu.buffer.obj);
+
+	ret = alloc_obj(dev_priv, &bo);
+	if (ret)
+		return ret;
+
+	dev_priv->gen_pmu.buffer.obj = bo;
+
+	dev_priv->gen_pmu.buffer.addr = vmap_oa_buffer(bo);
+
+	DRM_DEBUG_DRIVER("Gen PMU Buffer initialized, vaddr = %p",
+			 dev_priv->gen_pmu.buffer.addr);
+
+	return 0;
+}
+
+static enum hrtimer_restart hrtimer_sample_gen(struct hrtimer *hrtimer)
+{
+	struct drm_i915_private *i915 =
+		container_of(hrtimer, typeof(*i915), gen_pmu.timer);
+
+	gen_pmu_flush_snapshots(i915);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
+	return HRTIMER_RESTART;
+}
+
 static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
@@ -1232,6 +1302,111 @@  static int i915_oa_event_event_idx(struct perf_event *event)
 	return 0;
 }
 
+static int i915_gen_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	int ret = 0;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* To avoid the complexity of having to accurately filter
+	 * data and marshal to the appropriate client
+	 * we currently only allow exclusive access */
+	if (dev_priv->gen_pmu.buffer.obj)
+		return -EBUSY;
+
+	/*
+	 * We need to check for CAP_SYS_ADMIN capability as we profile all
+	 * the running contexts
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+
+	ret = init_gen_pmu_buffer(event);
+	if (ret)
+		return ret;
+
+	BUG_ON(dev_priv->gen_pmu.exclusive_event);
+	dev_priv->gen_pmu.exclusive_event = event;
+
+	event->destroy = i915_gen_event_destroy;
+
+	return 0;
+}
+
+static void i915_gen_event_start(struct perf_event *event, int flags)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	unsigned long lock_flags;
+
+	spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+
+	dev_priv->gen_pmu.event_active = true;
+
+	spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
+
+	__hrtimer_start_range_ns(&dev_priv->gen_pmu.timer, ns_to_ktime(PERIOD),
+					0, HRTIMER_MODE_REL_PINNED, 0);
+
+	event->hw.state = 0;
+}
+
+static void i915_gen_event_stop(struct perf_event *event, int flags)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	unsigned long lock_flags;
+
+	spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
+	dev_priv->gen_pmu.event_active = false;
+
+	spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
+
+	hrtimer_cancel(&dev_priv->gen_pmu.timer);
+	gen_pmu_flush_snapshots(dev_priv);
+
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static int i915_gen_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		i915_gen_event_start(event, flags);
+
+	return 0;
+}
+
+static void i915_gen_event_del(struct perf_event *event, int flags)
+{
+	i915_gen_event_stop(event, flags);
+}
+
+static void i915_gen_event_read(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	/* XXX: What counter would be useful here? */
+	local64_set(&event->count, 0);
+}
+
+static int i915_gen_event_flush(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	gen_pmu_flush_snapshots(i915);
+	return 0;
+}
+
+static int i915_gen_event_event_idx(struct perf_event *event)
+{
+	return 0;
+}
+
 void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context)
 {
@@ -1360,3 +1535,48 @@  void i915_oa_pmu_unregister(struct drm_device *dev)
 	perf_pmu_unregister(&i915->oa_pmu.pmu);
 	i915->oa_pmu.pmu.event_init = NULL;
 }
+
+void i915_gen_pmu_register(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	if (!(IS_HASWELL(dev) || IS_VALLEYVIEW(dev) || IS_BROADWELL(dev)))
+		return;
+
+	i915->gen_pmu.dummy_regs = *task_pt_regs(current);
+
+	hrtimer_init(&i915->gen_pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	i915->gen_pmu.timer.function = hrtimer_sample_gen;
+
+	spin_lock_init(&i915->gen_pmu.lock);
+
+	i915->gen_pmu.pmu.capabilities  = PERF_PMU_CAP_IS_DEVICE;
+
+	/* Effectively disallow opening an event with a specific pid
+	 * since we aren't interested in processes running on the cpu...
+	 */
+	i915->gen_pmu.pmu.task_ctx_nr   = perf_invalid_context;
+
+	i915->gen_pmu.pmu.event_init    = i915_gen_event_init;
+	i915->gen_pmu.pmu.add	       = i915_gen_event_add;
+	i915->gen_pmu.pmu.del	       = i915_gen_event_del;
+	i915->gen_pmu.pmu.start	       = i915_gen_event_start;
+	i915->gen_pmu.pmu.stop	       = i915_gen_event_stop;
+	i915->gen_pmu.pmu.read	       = i915_gen_event_read;
+	i915->gen_pmu.pmu.flush	       = i915_gen_event_flush;
+	i915->gen_pmu.pmu.event_idx     = i915_gen_event_event_idx;
+
+	if (perf_pmu_register(&i915->gen_pmu.pmu, "i915_gen", -1))
+		i915->gen_pmu.pmu.event_init = NULL;
+}
+
+void i915_gen_pmu_unregister(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	if (i915->gen_pmu.pmu.event_init == NULL)
+		return;
+
+	perf_pmu_unregister(&i915->gen_pmu.pmu);
+	i915->gen_pmu.pmu.event_init = NULL;
+}