@@ -16,7 +16,8 @@
#define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
- BIT(I915_SAMPLE_SEMA))
+ BIT(I915_SAMPLE_SEMA) | \
+ BIT(I915_SAMPLE_QUEUED))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -162,6 +163,12 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
}
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+ sample->cur += mul_u32_u32(val, mul);
+}
+
static void
engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
{
@@ -205,6 +212,11 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
if (val & RING_WAIT_SEMAPHORE)
add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
period_ns);
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+ add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ atomic_read(&engine->request_stats.queued),
+ period_ns);
}
if (fw)
@@ -213,12 +225,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
intel_runtime_pm_put(dev_priv);
}
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
- sample->cur += mul_u32_u32(val, mul);
-}
-
static void
frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
{
@@ -324,6 +330,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+ case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -541,6 +548,15 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val = engine->pmu.sample[sample].cur;
+
+ if (sample == I915_SAMPLE_QUEUED) {
+ BUILD_BUG_ON(NSEC_PER_SEC %
+ I915_SAMPLE_QUEUED_DIVISOR);
+ /* to qd */
+ val = div_u64(val,
+ NSEC_PER_SEC /
+ I915_SAMPLE_QUEUED_DIVISOR);
+ }
}
} else {
switch (event->attr.config) {
@@ -797,6 +813,16 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
{ \
.sample = (__sample), \
.name = (__name), \
+ .suffix = "unit", \
+ .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+ .suffix = "scale", \
+ .value = (__scale), \
}
static struct i915_ext_attribute *
@@ -824,6 +850,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
return ++attr;
}
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.001
+
static struct attribute **
create_event_attributes(struct drm_i915_private *i915)
{
@@ -840,10 +869,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+ char *suffix;
+ char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+ __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+ __stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -853,6 +886,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
+ BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+ (1 / I915_SAMPLE_QUEUED_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
@@ -930,13 +966,15 @@ create_event_attributes(struct drm_i915_private *i915)
engine->instance,
engine_events[i].sample));
- str = kasprintf(GFP_KERNEL, "%s-%s.unit",
- engine->name, engine_events[i].name);
+ str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+ engine->name, engine_events[i].name,
+ engine_events[i].suffix);
if (!str)
goto err;
*attr_iter++ = &pmu_iter->attr.attr;
- pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+ pmu_iter = add_pmu_attr(pmu_iter, str,
+ engine_events[i].value);
}
}
@@ -455,7 +455,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
} pmu;
@@ -110,9 +110,13 @@ enum drm_i915_gem_engine_class {
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
- I915_SAMPLE_SEMA = 2
+ I915_SAMPLE_SEMA = 2,
+ I915_SAMPLE_QUEUED = 3
};
+ /* Divide counter value by divisor to get the real value. */
+#define I915_SAMPLE_QUEUED_DIVISOR (1000)
+
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -133,6 +137,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)