@@ -60,12 +60,23 @@ static void wa_init_start(struct i915_wa_list *wal, const char *name, const char
#define WA_LIST_CHUNK (1 << 4)
+/*
+ * Some of the i915 code like perf OA tries to whitelist registers on demand.
+ * Such code adds to the wal->list, but that would not work because the list
+ * is compacted below by wa_init_finish. While _wa_add does have code to grow
+ * the list, it does not seem to take the compaction into consideration. Leave
+ * 8 entries free during the compaction until a better mechanism can be put in
+ * place.
+ */
+#define WA_LIST_DYNAMIC_ENTRIES 8
+
static void wa_init_finish(struct i915_wa_list *wal)
{
/* Trim unused entries. */
if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
+ size_t size = wal->count + WA_LIST_DYNAMIC_ENTRIES;
struct i915_wa *list = kmemdup(wal->list,
- wal->count * sizeof(*list),
+ size * sizeof(*list),
GFP_KERNEL);
if (list) {
@@ -81,10 +92,50 @@ static void wa_init_finish(struct i915_wa_list *wal)
wal->wa_count, wal->name, wal->engine_name);
}
+static int _wa_index(struct i915_wa_list *wal, i915_reg_t reg)
+{
+ unsigned int addr = i915_mmio_reg_offset(reg);
+ int start = 0, end = wal->count;
+
+ /* addr and wal->list[].reg, both include the R/W flags */
+ while (start < end) {
+ int mid = start + (end - start) / 2;
+
+ if (i915_mmio_reg_offset(wal->list[mid].reg) < addr)
+ start = mid + 1;
+ else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr)
+ end = mid;
+ else
+ return mid;
+ }
+
+ return -1;
+}
+
+static void _wa_remove(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
+{
+ int index;
+ struct i915_wa *wa = wal->list;
+
+ reg.reg |= flags;
+
+ index = _wa_index(wal, reg);
+ if (index < 0)
+ return;
+
+ memset(wa + index, 0, sizeof(*wa));
+
+ while (index < wal->count - 1) {
+ swap(wa[index], wa[index + 1]);
+ index++;
+ }
+
+ wal->count--;
+}
+
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
{
- unsigned int addr = i915_mmio_reg_offset(wa->reg);
- unsigned int start = 0, end = wal->count;
+ int index;
const unsigned int grow = WA_LIST_CHUNK;
struct i915_wa *wa_;
@@ -106,30 +157,23 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
wal->list = list;
}
- while (start < end) {
- unsigned int mid = start + (end - start) / 2;
-
- if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
- start = mid + 1;
- } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
- end = mid;
- } else {
- wa_ = &wal->list[mid];
-
- if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
- DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
- i915_mmio_reg_offset(wa_->reg),
- wa_->clr, wa_->set);
+ index = _wa_index(wal, wa->reg);
+ if (index >= 0) {
+ wa_ = &wal->list[index];
- wa_->set &= ~wa->clr;
- }
+ if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
+ DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
+ i915_mmio_reg_offset(wa_->reg),
+ wa_->clr, wa_->set);
- wal->wa_count++;
- wa_->set |= wa->set;
- wa_->clr |= wa->clr;
- wa_->read |= wa->read;
- return;
+ wa_->set &= ~wa->clr;
}
+
+ wal->wa_count++;
+ wa_->set |= wa->set;
+ wa_->clr |= wa->clr;
+ wa_->read |= wa->read;
+ return;
}
wal->wa_count++;
@@ -1264,7 +1308,8 @@ void intel_gt_init_workarounds(struct drm_i915_private *i915)
}
static enum forcewake_domains
-wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
+wal_get_fw(struct intel_uncore *uncore, const struct i915_wa_list *wal,
+ unsigned int op)
{
enum forcewake_domains fw = 0;
struct i915_wa *wa;
@@ -1273,8 +1318,7 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
fw |= intel_uncore_forcewake_for_reg(uncore,
wa->reg,
- FW_REG_READ |
- FW_REG_WRITE);
+ op);
return fw;
}
@@ -1304,7 +1348,7 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
if (!wal->count)
return;
- fw = wal_get_fw_for_rmw(uncore, wal);
+ fw = wal_get_fw(uncore, wal, FW_REG_READ | FW_REG_WRITE);
spin_lock_irqsave(&uncore->lock, flags);
intel_uncore_forcewake_get__locked(uncore, fw);
@@ -1616,25 +1660,32 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
{
+ enum forcewake_domains fw;
const struct i915_wa_list *wal = &engine->whitelist;
struct intel_uncore *uncore = engine->uncore;
const u32 base = engine->mmio_base;
struct i915_wa *wa;
+ unsigned long flags;
unsigned int i;
- if (!wal->count)
- return;
+ fw = wal_get_fw(uncore, wal, FW_REG_WRITE);
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_forcewake_get__locked(uncore, fw);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- intel_uncore_write(uncore,
- RING_FORCE_TO_NONPRIV(base, i),
- i915_mmio_reg_offset(wa->reg));
+ intel_uncore_write_fw(uncore,
+ RING_FORCE_TO_NONPRIV(base, i),
+ i915_mmio_reg_offset(wa->reg));
/* And clear the rest just in case of garbage */
for (; i < RING_MAX_NONPRIV_SLOTS; i++)
- intel_uncore_write(uncore,
- RING_FORCE_TO_NONPRIV(base, i),
- i915_mmio_reg_offset(RING_NOPID(base)));
+ intel_uncore_write_fw(uncore,
+ RING_FORCE_TO_NONPRIV(base, i),
+ i915_mmio_reg_offset(RING_NOPID(base)));
+
+ intel_uncore_forcewake_put__locked(uncore, fw);
+ spin_unlock_irqrestore(&uncore->lock, flags);
}
static void
@@ -1954,6 +2005,36 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine)
wa_init_finish(wal);
}
+void intel_engine_allow_user_register_access(struct intel_engine_cs *engine,
+ struct i915_whitelist_reg *reg,
+ u32 count)
+{
+ if (!engine || !reg)
+ return;
+
+ while (count--) {
+ whitelist_reg_ext(&engine->whitelist, reg->reg, reg->flags);
+ reg++;
+ }
+
+ intel_engine_apply_whitelist(engine);
+}
+
+void intel_engine_deny_user_register_access(struct intel_engine_cs *engine,
+ struct i915_whitelist_reg *reg,
+ u32 count)
+{
+ if (!engine || !reg)
+ return;
+
+ while (count--) {
+ _wa_remove(&engine->whitelist, reg->reg, reg->flags);
+ reg++;
+ }
+
+ intel_engine_apply_whitelist(engine);
+}
+
void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
{
wa_list_apply(engine->uncore, &engine->wa_list);
@@ -37,4 +37,11 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
const char *from);
+void intel_engine_allow_user_register_access(struct intel_engine_cs *engine,
+ struct i915_whitelist_reg *reg,
+ u32 count);
+void intel_engine_deny_user_register_access(struct intel_engine_cs *engine,
+ struct i915_whitelist_reg *reg,
+ u32 count);
+
#endif
@@ -11,6 +11,11 @@
#include "i915_reg.h"
+struct i915_whitelist_reg {
+ i915_reg_t reg;
+ u32 flags;
+};
+
struct i915_wa {
i915_reg_t reg;
u32 clr;
@@ -1347,12 +1347,59 @@ free_noa_wait(struct i915_perf_stream *stream)
i915_vma_unpin_and_release(&stream->noa_wait, 0);
}
+static struct i915_whitelist_reg gen9_oa_wl_regs[] = {
+ { OAREPORTTRIG2, RING_FORCE_TO_NONPRIV_ACCESS_RW },
+ { OAREPORTTRIG6, RING_FORCE_TO_NONPRIV_ACCESS_RW },
+};
+
+static struct i915_whitelist_reg gen12_oa_wl_regs[] = {
+ { GEN12_OAG_OAREPORTTRIG2, RING_FORCE_TO_NONPRIV_ACCESS_RW },
+ { GEN12_OAG_OAREPORTTRIG6, RING_FORCE_TO_NONPRIV_ACCESS_RW },
+};
+
+static void intel_engine_apply_oa_whitelist(struct i915_perf_stream *stream)
+{
+ struct intel_engine_cs *engine = stream->engine;
+ struct drm_i915_private *i915 = stream->perf->i915;
+
+ if (IS_GEN(i915, 12))
+ intel_engine_allow_user_register_access(engine,
+ gen12_oa_wl_regs,
+ ARRAY_SIZE(gen12_oa_wl_regs));
+ else if (INTEL_GEN(i915) > 8)
+ intel_engine_allow_user_register_access(engine,
+ gen9_oa_wl_regs,
+ ARRAY_SIZE(gen9_oa_wl_regs));
+ else
+ return;
+}
+
+static void intel_engine_remove_oa_whitelist(struct i915_perf_stream *stream)
+{
+ struct intel_engine_cs *engine = stream->engine;
+ struct drm_i915_private *i915 = stream->perf->i915;
+
+ if (IS_GEN(i915, 12))
+ intel_engine_deny_user_register_access(engine,
+ gen12_oa_wl_regs,
+ ARRAY_SIZE(gen12_oa_wl_regs));
+ else if (INTEL_GEN(i915) > 8)
+ intel_engine_deny_user_register_access(engine,
+ gen9_oa_wl_regs,
+ ARRAY_SIZE(gen9_oa_wl_regs));
+ else
+ return;
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
BUG_ON(stream != perf->exclusive_stream);
+ if (stream->oa_whitelisted)
+ intel_engine_remove_oa_whitelist(stream);
+
/*
* Unset exclusive_stream first, it will be checked while disabling
* the metric set on gen8+.
@@ -1448,7 +1495,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
* bit."
*/
intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
+ GEN7_OABUFFER_EDGE_TRIGGER);
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
@@ -1501,7 +1549,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* bit."
*/
intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
+ GEN7_OABUFFER_EDGE_TRIGGER);
intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
@@ -3470,6 +3519,22 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
if (!(param->flags & I915_PERF_FLAG_DISABLED))
i915_perf_enable_locked(stream);
+ /*
+ * OA whitelist allows non-privileged access to some OA counters for
+ * triggering reports into the OA buffer. This is only allowed if
+ * perf_stream_paranoid is set to 0 by the sysadmin.
+ *
+ * We want to make sure this is almost the last thing we do before
+ * returning the stream fd. If we do end up checking for errors in code
+ * that follows this, we MUST call intel_engine_remove_oa_whitelist in
+ * the error handling path to remove the whitelisted registers.
+ */
+ if (!i915_perf_stream_paranoid &&
+ props->sample_flags & SAMPLE_OA_REPORT) {
+ intel_engine_apply_oa_whitelist(stream);
+ stream->oa_whitelisted = true;
+ }
+
/* Take a reference on the driver that will be kept with stream_fd
* until its release.
*/
@@ -4441,8 +4506,13 @@ int i915_perf_ioctl_version(void)
*
* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
* interval for the hrtimer used to check for OA data.
+ *
+ * 6: Whitelist OATRIGGER registers to allow user to trigger reports
+ * into the OA buffer. This applies only to gen8+. The feature can
+ * only be accessed if perf_stream_paranoid is set to 0 by privileged
+ * user.
*/
- return 5;
+ return 6;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -311,6 +311,11 @@ struct i915_perf_stream {
* buffer should be checked for available data.
*/
u64 poll_oa_period;
+
+ /**
+ * @oa_whitelisted: Indicates that the oa registers are whitelisted.
+ */
+ bool oa_whitelisted;
};
/**