diff mbox

[v3,3/3] drm/i915: Only grab correct forcewake for the engine with execlists

Message ID 1460044560-30582-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 7, 2016, 3:56 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Rather than blindly waking up all forcewake domains on command
submission, we can teach each engine what is (or are) the correct
one to take.

On platforms with multiple forcewake domains like VLV, CHV, SKL
and BXT, this has the potential of lowering the GPU and CPU
power use and submission latency.

To implement it we add a function named
intel_uncore_forcewake_for_reg whose purpose is to query which
forcewake domains need to be taken to read or write a specific
register with raw mmio accessors.

These enables the execlists engine setup  to query which
forcewake domains are relevant per engine on the currently
running platform.

v2:
  * Kerneldoc.
  * Split from intel_uncore.c macro extraction, WARN_ON,
    no warns on old platforms. (Chris Wilson)

v3:
  * Single domain per engine, mention all registers,
    bi-directional function and a new name, fix handling
    of gen6 and gen7 writes. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |   7 +++
 drivers/gpu/drm/i915/intel_lrc.c        |  27 ++++++--
 drivers/gpu/drm/i915/intel_lrc.h        |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 drivers/gpu/drm/i915/intel_uncore.c     | 108 ++++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+), 5 deletions(-)

Comments

Chris Wilson April 12, 2016, 1:18 p.m. UTC | #1
On Thu, Apr 07, 2016 at 04:56:00PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Rather than blindly waking up all forcewake domains on command
> submission, we can teach each engine what is (or are) the correct
> one to take.
> 
> On platforms with multiple forcewake domains like VLV, CHV, SKL
> and BXT, this has the potential of lowering the GPU and CPU
> power use and submission latency.
> 
> To implement it we add a function named
> intel_uncore_forcewake_for_reg whose purpose is to query which
> forcewake domains need to be taken to read or write a specific
> register with raw mmio accessors.
> 
> These enables the execlists engine setup  to query which
> forcewake domains are relevant per engine on the currently
> running platform.
> 
> v2:
>   * Kerneldoc.
>   * Split from intel_uncore.c macro extraction, WARN_ON,
>     no warns on old platforms. (Chris Wilson)
> 
> v3:
>   * Single domain per engine, mention all registers,
>     bi-directional function and a new name, fix handling
>     of gen6 and gen7 writes. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

> +/**
> + * intel_uncore_forcewake_for_reg - which forcewake domains are needed to access
> + * 				    a register
> + * @dev_priv: pointer to struct drm_i915_private
> + * @reg: register in question
> + * @op: operation bitmask of FW_REG_READ and/or FW_REG_WRITE
> + *
> + * Returns a set of forcewake domains required to be taken with for example
> + * intel_uncore_forcewake_get for the specified register to be accessible in the
> + * specified mode (read, write or read/write) with raw mmio accessors.
> + *
> + * NOTE: On Gen6 and Gen7 write forcewake domain (FORCEWAKE_RENDER) requires the
> + * callers to do FIFO management on their own or risk losing writes.
> + */
> +enum forcewake_domains
> +intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> +			       i915_reg_t reg, unsigned int op)
> +{
> +	enum forcewake_domains fw_domains = 0;
> +
> +	WARN_ON(!op);
> +
> +	if (op & FW_REG_READ)
> +		fw_domains = intel_uncore_forcewake_for_read(dev_priv, reg);
> +
> +	if (op & FW_REG_WRITE)
> +		fw_domains |= intel_uncore_forcewake_for_write(dev_priv, reg);
> +
> +	return fw_domains;
> +}

Like it, like it a lot.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4ebd3ff02803..a3f2fb201758 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -633,6 +633,13 @@  enum forcewake_domains {
 			 FORCEWAKE_MEDIA)
 };
 
+#define FW_REG_READ  (1)
+#define FW_REG_WRITE (2)
+
+enum forcewake_domains
+intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
+			       i915_reg_t reg, unsigned int op);
+
 struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv,
 							enum forcewake_domains domains);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a1db6a02cf23..31445aa3429b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -418,6 +418,7 @@  static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
 				      struct drm_i915_gem_request *rq1)
 {
 	struct drm_i915_private *dev_priv = rq0->i915;
+	unsigned int fw_domains = rq0->engine->fw_domains;
 
 	execlists_update_context(rq0);
 
@@ -425,11 +426,11 @@  static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
 		execlists_update_context(rq1);
 
 	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
 
 	execlists_elsp_write(rq0, rq1);
 
-	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
@@ -552,7 +553,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 	unsigned int csb_read = 0, i;
 	unsigned int submit_contexts = 0;
 
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
 	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
 
@@ -577,7 +578,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
 				    engine->next_context_status_buffer << 8));
 
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 
 	spin_lock(&engine->execlist_lock);
 
@@ -2077,7 +2078,9 @@  logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift)
 static int
 logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 {
-	struct intel_context *dctx = to_i915(dev)->kernel_context;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_context *dctx = dev_priv->kernel_context;
+	enum forcewake_domains fw_domains;
 	int ret;
 
 	/* Intentionally left blank. */
@@ -2099,6 +2102,20 @@  logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 
 	logical_ring_init_platform_invariants(engine);
 
+	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
+						    RING_ELSP(engine),
+						    FW_REG_WRITE);
+
+	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+						     RING_CONTEXT_STATUS_PTR(engine),
+						     FW_REG_READ | FW_REG_WRITE);
+
+	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
+						     RING_CONTEXT_STATUS_BUF_BASE(engine),
+						     FW_REG_READ);
+
+	engine->fw_domains = fw_domains;
+
 	ret = i915_cmd_parser_init_ring(engine);
 	if (ret)
 		goto error;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 0b0853eee91e..8de1ea536ad4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -34,6 +34,7 @@ 
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	(1 << 3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE                (1 << 1)
+#define RING_CONTEXT_STATUS_BUF_BASE(ring)	_MMIO((ring)->mmio_base + 0x370)
 #define RING_CONTEXT_STATUS_BUF_LO(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8)
 #define RING_CONTEXT_STATUS_BUF_HI(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4)
 #define RING_CONTEXT_STATUS_PTR(ring)		_MMIO((ring)->mmio_base + 0x3a0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 18074ab55f61..3a11705222fc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,6 +270,7 @@  struct  intel_engine_cs {
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
 	struct list_head execlist_queue;
 	struct list_head execlist_retired_req_list;
+	unsigned int fw_domains;
 	unsigned int next_context_status_buffer;
 	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index b77bdf4a47f6..a465368f5edf 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1766,3 +1766,111 @@  intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv)
 
 	return false;
 }
+
+static enum forcewake_domains
+intel_uncore_forcewake_for_read(struct drm_i915_private *dev_priv,
+				i915_reg_t reg)
+{
+	enum forcewake_domains fw_domains;
+
+	if (intel_vgpu_active(dev_priv->dev))
+		return 0;
+
+	switch (INTEL_INFO(dev_priv)->gen) {
+	case 9:
+		fw_domains = __gen9_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		break;
+	case 8:
+		if (IS_CHERRYVIEW(dev_priv))
+			fw_domains = __chv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			fw_domains = __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		break;
+	case 7:
+	case 6:
+		if (IS_VALLEYVIEW(dev_priv))
+			fw_domains = __vlv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			fw_domains = __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		break;
+	default:
+		MISSING_CASE(INTEL_INFO(dev_priv)->gen);
+	case 5: /* forcewake was introduced with gen6 */
+	case 4:
+	case 3:
+	case 2:
+		return 0;
+	}
+
+	WARN_ON(fw_domains & ~dev_priv->uncore.fw_domains);
+
+	return fw_domains;
+}
+
+static enum forcewake_domains
+intel_uncore_forcewake_for_write(struct drm_i915_private *dev_priv,
+				 i915_reg_t reg)
+{
+	enum forcewake_domains fw_domains;
+
+	if (intel_vgpu_active(dev_priv->dev))
+		return 0;
+
+	switch (INTEL_INFO(dev_priv)->gen) {
+	case 9:
+		fw_domains = __gen9_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+		break;
+	case 8:
+		if (IS_CHERRYVIEW(dev_priv))
+			fw_domains = __chv_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			fw_domains = __gen8_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+		break;
+	case 7:
+	case 6:
+		fw_domains = FORCEWAKE_RENDER;
+		break;
+	default:
+		MISSING_CASE(INTEL_INFO(dev_priv)->gen);
+	case 5:
+	case 4:
+	case 3:
+	case 2:
+		return 0;
+	}
+
+	WARN_ON(fw_domains & ~dev_priv->uncore.fw_domains);
+
+	return fw_domains;
+}
+
+/**
+ * intel_uncore_forcewake_for_reg - which forcewake domains are needed to access
+ * 				    a register
+ * @dev_priv: pointer to struct drm_i915_private
+ * @reg: register in question
+ * @op: operation bitmask of FW_REG_READ and/or FW_REG_WRITE
+ *
+ * Returns a set of forcewake domains required to be taken with for example
+ * intel_uncore_forcewake_get for the specified register to be accessible in the
+ * specified mode (read, write or read/write) with raw mmio accessors.
+ *
+ * NOTE: On Gen6 and Gen7 write forcewake domain (FORCEWAKE_RENDER) requires the
+ * callers to do FIFO management on their own or risk losing writes.
+ */
+enum forcewake_domains
+intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
+			       i915_reg_t reg, unsigned int op)
+{
+	enum forcewake_domains fw_domains = 0;
+
+	WARN_ON(!op);
+
+	if (op & FW_REG_READ)
+		fw_domains = intel_uncore_forcewake_for_read(dev_priv, reg);
+
+	if (op & FW_REG_WRITE)
+		fw_domains |= intel_uncore_forcewake_for_write(dev_priv, reg);
+
+	return fw_domains;
+}