diff mbox

drm/i915: Map the execlists context regs once during pinning

Message ID 1432127557-17281-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 20, 2015, 1:12 p.m. UTC
When we pin the execlists context on queuing, it the ideal time to map
the register page that we need to update when we submit the request to
the hardware (and keep it around for future requests).

This avoids having to do an atomic kmap on every submission. On the
other hand, it does depend upon correct request construction.

v2: Rebase
v3: Rebase
v4: Rebase

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         |  10 --
 drivers/gpu/drm/i915/i915_gem_gtt.c     |   1 +
 drivers/gpu/drm/i915/intel_lrc.c        | 216 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 5 files changed, 95 insertions(+), 135 deletions(-)

Comments

Chris Wilson May 20, 2015, 1:16 p.m. UTC | #1
On Wed, May 20, 2015 at 02:12:37PM +0100, Chris Wilson wrote:
> When we pin the execlists context on queuing, it the ideal time to map
> the register page that we need to update when we submit the request to
> the hardware (and keep it around for future requests).
> 
> This avoids having to do an atomic kmap on every submission. On the
> other hand, it does depend upon correct request construction.

Obviously this is the wrong patch for this thread ;-)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3f0b85a22eeb..b3cf3fa5716a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2653,13 +2653,6 @@  void i915_gem_request_free(struct kref *req_ref)
 	struct intel_context *ctx = req->ctx;
 
 	if (ctx) {
-		if (i915.enable_execlists) {
-			struct intel_engine_cs *ring = req->ring;
-
-			if (ctx != ring->default_context)
-				intel_lr_context_unpin(ring, ctx);
-		}
-
 		i915_gem_context_unreference(ctx);
 	}
 
@@ -2767,9 +2760,6 @@  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 				execlist_link);
 		list_del(&submit_req->execlist_link);
 
-		if (submit_req->ctx != ring->default_context)
-			intel_lr_context_unpin(ring, submit_req->ctx);
-
 		i915_gem_request_unreference(submit_req);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 450418b0111b..54fd17ddf0f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -915,6 +915,7 @@  static int gen8_alloc_va_range(struct i915_address_space *vm,
 	}
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	ppgtt->pd_dirty_rings = ~0;
 	return 0;
 
 err_out:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d6d3fc2bb77b..c8ded8b05af5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -212,9 +212,6 @@  enum {
 };
 #define GEN8_CTX_ID_SHIFT 32
 
-static int intel_lr_context_pin(struct intel_engine_cs *ring,
-		struct intel_context *ctx);
-
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  * @dev: DRM device.
@@ -330,57 +327,39 @@  static void execlists_elsp_write(struct intel_engine_cs *ring,
 	spin_unlock(&dev_priv->uncore.lock);
 }
 
-static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
-				    struct drm_i915_gem_object *ring_obj,
-				    struct i915_hw_ppgtt *ppgtt,
-				    u32 tail)
+static struct drm_i915_gem_object *
+execlists_update_context(struct intel_engine_cs *ring,
+			 struct intel_context *ctx,
+			 u32 tail)
 {
-	struct page *page;
-	uint32_t *reg_state;
-
-	page = i915_gem_object_get_page(ctx_obj, 1);
-	reg_state = kmap_atomic(page);
-
-	reg_state[CTX_RING_TAIL+1] = tail;
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset_view(ring_obj, NULL);
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
 
 	/* True PPGTT with dynamic page allocation: update PDP registers and
 	 * point the unallocated PDPs to the scratch page
 	 */
-	if (ppgtt) {
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	if (ppgtt && ppgtt->pd_dirty_rings & intel_ring_flag(ring)) {
+		ASSIGN_CTX_PDP(ppgtt, ringbuf->regs, 3);
+		ASSIGN_CTX_PDP(ppgtt, ringbuf->regs, 2);
+		ASSIGN_CTX_PDP(ppgtt, ringbuf->regs, 1);
+		ASSIGN_CTX_PDP(ppgtt, ringbuf->regs, 0);
+		ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
 	}
 
-	kunmap_atomic(reg_state);
-
-	return 0;
+	ringbuf->regs[CTX_RING_TAIL+1] = tail;
+	return ctx->engine[ring->id].state;
 }
 
 static void execlists_submit_contexts(struct intel_engine_cs *ring,
 				      struct intel_context *to0, u32 tail0,
 				      struct intel_context *to1, u32 tail1)
 {
-	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
-	struct drm_i915_gem_object *ctx_obj1 = NULL;
-	struct intel_ringbuffer *ringbuf1 = NULL;
-
-	BUG_ON(!ctx_obj0);
-
-	execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
+	struct drm_i915_gem_object *c0, *c1;
 
-	if (to1) {
-		ringbuf1 = to1->engine[ring->id].ringbuf;
-		ctx_obj1 = to1->engine[ring->id].state;
-		BUG_ON(!ctx_obj1);
+	c0 = execlists_update_context(ring, to0, tail0);
+	c1 = to1 ? execlists_update_context(ring, to1, tail1) : NULL;
 
-		execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
-	}
-
-	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
+	execlists_elsp_write(ring, c0, c1);
 }
 
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
@@ -547,25 +526,15 @@  static int execlists_context_queue(struct intel_engine_cs *ring,
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (to != ring->default_context)
-		intel_lr_context_pin(ring, to);
+	if (WARN_ON(request == NULL))
+		return -ENODEV;
+
+	if (WARN_ON(to->engine[ring->id].pin_count == 0))
+		return -ENODEV;
+
+	i915_gem_request_reference(request);
+	WARN_ON(to != request->ctx);
 
-	if (!request) {
-		/*
-		 * If there isn't a request associated with this submission,
-		 * create one as a temporary holder.
-		 */
-		request = kzalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-		request->ring = ring;
-		request->ctx = to;
-		kref_init(&request->ref);
-		i915_gem_context_reference(request->ctx);
-	} else {
-		i915_gem_request_reference(request);
-		WARN_ON(to != request->ctx);
-	}
 	request->tail = tail;
 
 	spin_lock_irq(&ring->execlist_lock);
@@ -654,16 +623,47 @@  static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 	return logical_ring_invalidate_all_caches(ringbuf, ctx);
 }
 
+static int intel_lr_context_pin(struct intel_engine_cs *ring,
+				struct intel_context *ctx)
+{
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct i915_vma *vma;
+
+	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	if (ctx->engine[ring->id].pin_count++)
+		return 0;
+
+	vma = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+	if (IS_ERR(vma))
+		goto reset_pin_count;
+
+	ctx->engine[ring->id].vma = vma;
+
+	vma = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+	if (IS_ERR(vma))
+		goto unpin_ctx_obj;
+
+	ringbuf->regs = kmap(i915_gem_object_get_page(ctx_obj, 1));
+	ringbuf->regs[CTX_RING_BUFFER_START+1] = vma->node.start;
+	return 0;
+
+unpin_ctx_obj:
+	i915_vma_unpin(ctx->engine[ring->id].vma);
+	ctx->engine[ring->id].vma = NULL;
+reset_pin_count:
+	ctx->engine[ring->id].pin_count = 0;
+	return PTR_ERR(vma);
+}
+
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request,
 					    struct intel_context *ctx)
 {
 	int ret;
 
-	if (ctx != request->ring->default_context) {
-		ret = intel_lr_context_pin(request->ring, ctx);
-		if (ret)
-			return ret;
-	}
+	ret = intel_lr_context_pin(request->ring, ctx);
+	if (ret)
+		return ret;
 
 	request->ringbuf = ctx->engine[request->ring->id].ringbuf;
 	request->ctx     = ctx;
@@ -913,29 +913,43 @@  int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	return 0;
 }
 
+static void intel_lr_context_unpin(struct intel_engine_cs *ring,
+				   struct intel_context *ctx)
+{
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+
+	if (--ctx->engine[ring->id].pin_count)
+		return;
+
+	kunmap(i915_gem_object_get_page(ctx->engine[ring->id].state, 1));
+	ringbuf->regs = NULL;
+
+	intel_unpin_ringbuffer_obj(ringbuf);
+
+	i915_vma_unpin(ctx->engine[ring->id].vma);
+	ctx->engine[ring->id].vma = NULL;
+}
+
 void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *req, *tmp;
-	struct list_head retired_list;
+	struct list_head list;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 	if (list_empty(&ring->execlist_retired_req_list))
 		return;
 
-	INIT_LIST_HEAD(&retired_list);
 	spin_lock_irq(&ring->execlist_lock);
-	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
+	list_replace_init(&ring->execlist_retired_req_list, &list);
 	spin_unlock_irq(&ring->execlist_lock);
 
-	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->ctx;
-		struct drm_i915_gem_object *ctx_obj =
-				ctx->engine[ring->id].state;
+	while (!list_empty(&list)) {
+		struct drm_i915_gem_request *rq;
+
+		rq = list_first_entry(&list, typeof(*rq), execlist_link);
+		list_del(&rq->execlist_link);
 
-		if (ctx_obj && (ctx != ring->default_context))
-			intel_lr_context_unpin(ring, ctx);
-		list_del(&req->execlist_link);
-		i915_gem_request_unreference(req);
+		intel_lr_context_unpin(ring, rq->ctx);
+		i915_gem_request_unreference(rq);
 	}
 }
 
@@ -978,52 +992,6 @@  int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static int intel_lr_context_pin(struct intel_engine_cs *ring,
-		struct intel_context *ctx)
-{
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-	struct i915_vma *vma;
-
-	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-	if (ctx->engine[ring->id].pin_count++ == 0) {
-		vma = i915_gem_obj_ggtt_pin(ctx_obj,
-					    GEN8_LR_CONTEXT_ALIGN, 0);
-		if (IS_ERR(vma))
-			goto reset_pin_count;
-
-		ctx->engine[ring->id].vma = vma;
-
-		vma = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
-		if (IS_ERR(vma))
-			goto unpin_ctx_obj;
-	}
-
-	return 0;
-
-unpin_ctx_obj:
-	i915_vma_unpin(ctx->engine[ring->id].vma);
-	ctx->engine[ring->id].pin_count = 0;
-reset_pin_count:
-	ctx->engine[ring->id].vma = NULL;
-	return PTR_ERR(vma);
-}
-
-void intel_lr_context_unpin(struct intel_engine_cs *ring,
-		struct intel_context *ctx)
-{
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-
-	if (ctx_obj) {
-		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_vma_unpin(ctx->engine[ring->id].vma);
-		}
-	}
-}
-
 static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
 					       struct intel_context *ctx)
 {
@@ -1780,13 +1748,15 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
 
-	/* With dynamic page allocation, PDPs may not be allocated at this point,
-	 * Point the unallocated PDPs to the scratch page
+	/* With dynamic page allocation, PDPs may not be allocated at this
+	 * point. Point the unallocated PDPs to the scratch page.
 	 */
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
+
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
@@ -2003,7 +1973,7 @@  error_unpin_ctx:
 }
 
 void intel_lr_context_reset(struct drm_device *dev,
-			struct intel_context *ctx)
+			    struct intel_context *ctx)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 49828f1f5708..9c91bf915a64 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -70,8 +70,6 @@  static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
-void intel_lr_context_unpin(struct intel_engine_cs *ring,
-		struct intel_context *ctx);
 void intel_lr_context_reset(struct drm_device *dev,
 			struct intel_context *ctx);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ce2ae5f7dc00..398afe256081 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -96,6 +96,7 @@  struct intel_ring_hangcheck {
 struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
 	void __iomem *virtual_start;
+	uint32_t *regs;
 
 	struct intel_engine_cs *ring;
 	struct i915_vma *vma;