@@ -2990,6 +2990,11 @@ intel_info(const struct drm_i915_private *dev_priv)
#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio)
+#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
+ (HAS_CSR(dev_priv) && \
+ (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) && \
+ (dev_priv)->csr.dmc_payload)
+
#include "i915_trace.h"
static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
@@ -3200,7 +3200,11 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
+
intel_runtime_pm_put(dev_priv);
+
+ if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+ intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
out_unlock:
mutex_unlock(&dev->struct_mutex);
@@ -873,6 +873,23 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
GEM_BUG_ON(!dev_priv->gt.active_requests);
+ /*
+ * It seems that the DMC likes to transition between the DC states
+ * a lot when there are no connected displays (no active power
+ * domains) during simple command submission.
+ *
+ * This frantic activity on DC states has a terrible impact on the
+ * performance of the overall chip with huge latencies observed in
+ * the interrupt handlers and elsewhere. Simple tests like
+ * igt/gem_latency -n 0 are slowed down by a factor of eight.
+ *
+ * Work around it by grabbing a modeset display power domain whilst
+ * there is any GT activity. This seems to be effective in making
+ * the DMC keep its paws off the chip.
+ */
+ if (NEEDS_CSR_GT_PERF_WA(dev_priv))
+ intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+
intel_runtime_pm_get_noresume(dev_priv);
dev_priv->gt.awake = true;