@@ -90,23 +90,16 @@ static unsigned int event_enabled_bit(struct perf_event *event)
return config_enabled_bit(event->attr.config);
}
-static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
+static bool
+__pmu_needs_timer(struct drm_i915_private *i915, u64 enable, bool gpu_active)
{
- u64 enable;
-
- /*
- * Only some counters need the sampling timer.
- *
- * We start with a bitmask of all currently enabled events.
- */
- enable = i915->pmu.enable;
-
/*
- * Mask out all the ones which do not need the timer, or in
+ * Mask out all events which do not need the timer, or in
* other words keep all the ones that could need the timer.
*/
enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) |
+ config_enabled_mask(I915_PMU_RC6_RESIDENCY) |
ENGINE_SAMPLE_MASK;
/*
@@ -130,6 +123,11 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
return enable;
}
+static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
+{
+ return __pmu_needs_timer(i915, i915->pmu.enable, gpu_active);
+}
+
void i915_pmu_gt_parked(struct drm_i915_private *i915)
{
if (!i915->pmu.base.event_init)
@@ -181,20 +179,20 @@ update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
sample->cur += mul_u32_u32(val, unit);
}
-static void engines_sample(struct drm_i915_private *dev_priv)
+static bool engines_sample(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
bool fw = false;
if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
- return;
+ return false;
if (!dev_priv->gt.awake)
- return;
+ return false;
if (!intel_runtime_pm_get_if_in_use(dev_priv))
- return;
+ return false;
for_each_engine(engine, dev_priv, id) {
u32 current_seqno = intel_engine_get_seqno(engine);
@@ -225,10 +223,51 @@ static void engines_sample(struct drm_i915_private *dev_priv)
if (fw)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
- intel_runtime_pm_put(dev_priv);
+ return true;
+}
+
+static u64 read_rc6_residency(struct drm_i915_private *i915)
+{
+ u64 val;
+
+ val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
+ VLV_GT_RENDER_RC6 : GEN6_GT_GFX_RC6);
+ if (HAS_RC6p(i915))
+ val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+ if (HAS_RC6pp(i915))
+ val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+ return val;
+}
+
+static void
+update_rc6_sample(struct drm_i915_private *i915, u64 val, bool locked)
+{
+ unsigned long flags;
+
+ if (!locked)
+ spin_lock_irqsave(&i915->pmu.lock, flags);
+
+ /*
+ * Update stored RC6 counter only if it is greater than the current
+ * value. This deals with periods of runtime suspend during which we are
+ * estimating the RC6 residency, so do not want to overshoot the real
+ * value read once the device is woken up.
+ */
+ if (val > i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+ i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+
+ /* We don't need to sample RC6 from the timer any more. */
+ i915->pmu.timer_enabled =
+ __pmu_needs_timer(i915,
+ i915->pmu.enable & ~config_enabled_mask(I915_PMU_RC6_RESIDENCY),
+ READ_ONCE(i915->gt.awake));
+
+ if (!locked)
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
}
-static void frequency_sample(struct drm_i915_private *dev_priv)
+static bool others_sample(struct drm_i915_private *dev_priv, bool pm)
{
if (dev_priv->pmu.enable &
config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
@@ -236,10 +275,10 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
val = dev_priv->gt_pm.rps.cur_freq;
if (dev_priv->gt.awake &&
- intel_runtime_pm_get_if_in_use(dev_priv)) {
+ (pm || intel_runtime_pm_get_if_in_use(dev_priv))) {
+ pm = true;
val = intel_get_cagf(dev_priv,
I915_READ_NOTRACE(GEN6_RPSTAT1));
- intel_runtime_pm_put(dev_priv);
}
update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
@@ -252,18 +291,48 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
intel_gpu_freq(dev_priv,
dev_priv->gt_pm.rps.cur_freq));
}
+
+ if (dev_priv->pmu.enable &
+ config_enabled_mask(I915_PMU_RC6_RESIDENCY)) {
+ if (pm || intel_runtime_pm_get_if_in_use(dev_priv)) {
+ update_rc6_sample(dev_priv,
+ read_rc6_residency(dev_priv),
+ false);
+ pm = true;
+ } else {
+ unsigned long flags;
+
+ /*
+ * When device is runtime suspended we assume RC6
+ * residency is increasing by the sampling timer period.
+ */
+ spin_lock_irqsave(&dev_priv->pmu.lock, flags);
+ dev_priv->pmu.sample[__I915_SAMPLE_RC6].cur += PERIOD;
+ spin_unlock_irqrestore(&dev_priv->pmu.lock, flags);
+ }
+ }
+
+ return pm;
+
}
static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
{
struct drm_i915_private *i915 =
container_of(hrtimer, struct drm_i915_private, pmu.timer);
+ bool pm;
if (!READ_ONCE(i915->pmu.timer_enabled))
return HRTIMER_NORESTART;
- engines_sample(i915);
- frequency_sample(i915);
+ pm = engines_sample(i915);
+ pm = others_sample(i915, pm);
+
+ if (pm)
+ intel_runtime_pm_put(i915);
+
+ if (!READ_ONCE(i915->pmu.timer_enabled))
+ return HRTIMER_NORESTART;
hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
return HRTIMER_RESTART;
@@ -415,7 +484,7 @@ static int i915_pmu_event_init(struct perf_event *event)
return 0;
}
-static u64 __i915_pmu_event_read(struct perf_event *event)
+static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
{
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
@@ -453,18 +522,26 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = count_interrupts(i915);
break;
case I915_PMU_RC6_RESIDENCY:
- intel_runtime_pm_get(i915);
- val = intel_rc6_residency_ns(i915,
- IS_VALLEYVIEW(i915) ?
- VLV_GT_RENDER_RC6 :
- GEN6_GT_GFX_RC6);
- if (HAS_RC6p(i915))
- val += intel_rc6_residency_ns(i915,
- GEN6_GT_GFX_RC6p);
- if (HAS_RC6pp(i915))
- val += intel_rc6_residency_ns(i915,
- GEN6_GT_GFX_RC6pp);
- intel_runtime_pm_put(i915);
+ if (intel_runtime_pm_get_if_in_use(i915)) {
+ update_rc6_sample(i915,
+ read_rc6_residency(i915),
+ locked);
+ intel_runtime_pm_put(i915);
+ } else {
+ unsigned long flags;
+
+ /*
+ * If we failed to read the actual value, start
+ * the timer which will be estimating it while
+ * device is suspended.
+ */
+ if (!locked)
+ spin_lock_irqsave(&i915->pmu.lock, flags);
+ __i915_pmu_maybe_start_timer(i915);
+ if (!locked)
+ spin_unlock_irqrestore(&i915->pmu.lock, flags);
+ }
+ val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
break;
}
}
@@ -479,7 +556,7 @@ static void i915_pmu_event_read(struct perf_event *event)
again:
prev = local64_read(&hwc->prev_count);
- new = __i915_pmu_event_read(event);
+ new = __i915_pmu_event_read(event, false);
if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
goto again;
@@ -534,7 +611,7 @@ static void i915_pmu_enable(struct perf_event *event)
* for all listeners. Even when the event was already enabled and has
* an existing non-zero value.
*/
- local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
+ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
spin_unlock_irqrestore(&i915->pmu.lock, flags);
}
@@ -27,6 +27,7 @@
enum {
__I915_SAMPLE_FREQ_ACT = 0,
__I915_SAMPLE_FREQ_REQ,
+ __I915_SAMPLE_RC6,
__I915_NUM_PMU_SAMPLERS
};