diff mbox

[37/53] drm/i915/bdw: Implement context switching (somewhat)

Message ID 1402673891-14618-38-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com June 13, 2014, 3:37 p.m. UTC
From: Ben Widawsky <benjamin.widawsky@intel.com>

A context switch occurs by submitting a context descriptor to the
ExecList Submission Port. Given that we can now initialize a context,
it's possible to begin implementing the context switch by creating the
descriptor and submitting it to ELSP (actually two, since the ELSP
has two ports).

The context object must be mapped in the GGTT, which means it must exist
in the 0-4GB graphics VA range.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

v2: This code has changed quite a lot in various rebases. Of particular
importance is that now we use the globally unique Submission ID to send
to the hardware. Also, context pages are now pinned unconditionally to
GGTT, so there is no need to bind them.

v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context
ID we submit to the ELSP is globally unique and != 0 (Bspec requirements
of the software use-only bits of the Context ID in the Context Descriptor
Format) without the hassle of the previous submission Id construction.
Also, re-add the ELSP porting read (it was dropped somewhere during the
rebases).

v4:
- Squash with "drm/i915/bdw: Add forcewake lock around ELSP writes" (BSPEC
  says: "SW must set Force Wakeup bit to prevent GT from entering C6 while
  ELSP writes are in progress") as noted by Thomas Daniel
  (thomas.daniel@intel.com).
- Rename functions and use an execlists/intel_execlists_ namespace.
- The BUG_ON only checked that the LRCA was <32 bits, but it didn't make
  sure that it was properly aligned. Spotted by Alistair Mcaulay
  <alistair.mcaulay@intel.com>.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 112 ++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h |   1 +
 2 files changed, 112 insertions(+), 1 deletion(-)

Comments

Chris Wilson June 13, 2014, 5 p.m. UTC | #1
On Fri, Jun 13, 2014 at 04:37:55PM +0100, oscar.mateo@intel.com wrote:
> +static void execlists_elsp_write(struct intel_engine_cs *ring,
> +				 struct drm_i915_gem_object *ctx_obj0,
> +				 struct drm_i915_gem_object *ctx_obj1)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	uint64_t temp = 0;
> +	uint32_t desc[4];
> +
> +	/* XXX: You must always write both descriptors in the order below. */
> +	if (ctx_obj1)
> +		temp = execlists_ctx_descriptor(ctx_obj1);
> +	else
> +		temp = 0;
> +	desc[1] = (u32)(temp >> 32);
> +	desc[0] = (u32)temp;
> +
> +	temp = execlists_ctx_descriptor(ctx_obj0);
> +	desc[3] = (u32)(temp >> 32);
> +	desc[2] = (u32)temp;
> +
> +	/* Set Force Wakeup bit to prevent GT from entering C6 while
> +	 * ELSP writes are in progress */
> +	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	I915_WRITE(RING_ELSP(ring), desc[1]);
> +	I915_WRITE(RING_ELSP(ring), desc[0]);
> +	I915_WRITE(RING_ELSP(ring), desc[3]);
> +	/* The context is automatically loaded after the following */
> +	I915_WRITE(RING_ELSP(ring), desc[2]);
> +
> +	/* ELSP is a write only register, so this serves as a posting read */

I can see that is a POSTING_READ, so say something like
/* ELSP is a wo reg, so use another nearby reg for posting instead */

> +	POSTING_READ(RING_EXECLIST_STATUS(ring));
> +
> +	/* Release Force Wakeup */

Redundant, the clue is in the function name.

> +	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> +}
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c9a5e00..4e8268c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -47,6 +47,7 @@ 
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
 #define RING_ELSP(ring)			((ring)->mmio_base+0x230)
+#define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
 #define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
 
 #define CTX_LRI_HEADER_0		0x01
@@ -78,6 +79,26 @@ 
 #define CTX_R_PWR_CLK_STATE		0x42
 #define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
 
+#define GEN8_CTX_VALID (1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
+#define GEN8_CTX_FORCE_RESTORE (1<<2)
+#define GEN8_CTX_L3LLC_COHERENT (1<<5)
+#define GEN8_CTX_PRIVILEGE (1<<8)
+enum {
+	ADVANCED_CONTEXT=0,
+	LEGACY_CONTEXT,
+	ADVANCED_AD_CONTEXT,
+	LEGACY_64B_CONTEXT
+};
+#define GEN8_CTX_MODE_SHIFT 3
+enum {
+	FAULT_AND_HANG=0,
+	FAULT_AND_HALT, /* Debug only */
+	FAULT_AND_STREAM,
+	FAULT_AND_CONTINUE /* Unsupported */
+};
+#define GEN8_CTX_ID_SHIFT 32
+
 bool intel_enable_execlists(struct drm_device *dev)
 {
 	if (!i915.enable_execlists)
@@ -86,6 +107,94 @@  bool intel_enable_execlists(struct drm_device *dev)
 	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
 }
 
+u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
+{
+	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+
+	/* LRCA is required to be 4K aligned so the more significant 20 bits
+	 * are globally unique */
+	return lrca >> 12;
+}
+
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
+{
+	uint64_t desc;
+	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+	BUG_ON(lrca & 0xFFFFFFFF00000FFFULL);
+
+	desc = GEN8_CTX_VALID;
+	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
+	desc |= GEN8_CTX_L3LLC_COHERENT;
+	desc |= GEN8_CTX_PRIVILEGE;
+	desc |= lrca;
+	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers */
+	/* desc |= GEN8_CTX_FORCE_RESTORE; */
+
+	return desc;
+}
+
+static void execlists_elsp_write(struct intel_engine_cs *ring,
+				 struct drm_i915_gem_object *ctx_obj0,
+				 struct drm_i915_gem_object *ctx_obj1)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	uint64_t temp = 0;
+	uint32_t desc[4];
+
+	/* XXX: You must always write both descriptors in the order below. */
+	if (ctx_obj1)
+		temp = execlists_ctx_descriptor(ctx_obj1);
+	else
+		temp = 0;
+	desc[1] = (u32)(temp >> 32);
+	desc[0] = (u32)temp;
+
+	temp = execlists_ctx_descriptor(ctx_obj0);
+	desc[3] = (u32)(temp >> 32);
+	desc[2] = (u32)temp;
+
+	/* Set Force Wakeup bit to prevent GT from entering C6 while
+	 * ELSP writes are in progress */
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
+	I915_WRITE(RING_ELSP(ring), desc[1]);
+	I915_WRITE(RING_ELSP(ring), desc[0]);
+	I915_WRITE(RING_ELSP(ring), desc[3]);
+	/* The context is automatically loaded after the following */
+	I915_WRITE(RING_ELSP(ring), desc[2]);
+
+	/* ELSP is a write only register, so this serves as a posting read */
+	POSTING_READ(RING_EXECLIST_STATUS(ring));
+
+	/* Release Force Wakeup */
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static int execlists_submit_context(struct intel_engine_cs *ring,
+				    struct intel_context *to0, u32 tail0,
+				    struct intel_context *to1, u32 tail1)
+{
+	struct drm_i915_gem_object *ctx_obj0;
+	struct drm_i915_gem_object *ctx_obj1 = NULL;
+
+	ctx_obj0 = to0->engine[ring->id].obj;
+	BUG_ON(!ctx_obj0);
+	BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+
+	if (to1) {
+		ctx_obj1 = to1->engine[ring->id].obj;
+		BUG_ON(!ctx_obj1);
+		BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+	}
+
+	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
+
+	return 0;
+}
+
 static inline struct intel_ringbuffer *
 logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx)
 {
@@ -763,7 +872,8 @@  static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 static void gen8_submit_ctx(struct intel_engine_cs *ring,
 			    struct intel_context *ctx, u32 value)
 {
-	DRM_ERROR("Execlists still not ready!\n");
+	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
+	execlists_submit_context(ring, ctx, value, NULL, 0);
 }
 
 static int gen8_emit_request(struct intel_engine_cs *ring,
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 0cb7cb5..eeb90ec 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -41,6 +41,7 @@  int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
 
 /* Execlists */
+u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 bool intel_enable_execlists(struct drm_device *dev);
 
 #endif /* _INTEL_LRC_H_ */