diff mbox

[29/29] WIP: Defer seqno allocation until actual hardware submission time

Message ID 1414694481-15724-30-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison Oct. 30, 2014, 6:41 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

*** Work in progress. Do not submit - currently broken! ***
This patch is being included in the series simply to show the intention.

The seqno value is now only used for the final test for completion of a request.
It is no longer used to track the request through the software stack. Thus it is
no longer necessary to allocate the seqno immediately with the request. Instead,
it can be done lazily and left until the request is actually sent to the
hardware. This is particular advantageous with a GPU scheduler as the requests
can then be re-ordered between their creation and their hardware submission.

The problem with lazy seqno allocation is that wrapping the seqno around zero
currently involves idling the hardware for reasons that are not entirely clear.
This is something that is not safe to do half way through ring submission. Thus
the wrapping must be done in advance even if the actual seqno assignment is not
done until i915_add_request().

Unfortunately, there is no clearly defined point at which to do the
semi-lazy-allocation or advance wrapping. The problem is that requests can be
submitted asynchronously half way through do_execbuffer() processing! The
solution is to ensure that all commands are actually submitted as requests at
the appropriate time by adding extra add_request() calls. That is future work...

For: VIZ-4377
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |    2 +-
 drivers/gpu/drm/i915/i915_gem.c         |   31 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.c        |   10 ++++------
 drivers/gpu/drm/i915/intel_ringbuffer.c |   10 ++++------
 4 files changed, 39 insertions(+), 14 deletions(-)

Comments

Shuang He Nov. 4, 2014, 11:12 a.m. UTC | #1
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform: baseline_drm_intel_nightly_pass_rate->patch_applied_pass_rate
BYT: pass/total=348/348->347/348
PNV: pass/total=328/328->324/328
ILK: pass/total=329/330->329/330
IVB: pass/total=546/546->545/546
SNB: pass/total=540/541->538/541
HSW: pass/total=564/568->562/568
BDW: pass/total=435/435->435/435
-------------------------------------Detailed-------------------------------------
test_platform: test_suite, test_case, result_with_drm_intel_nightly(count, machine_id...)...->result_with_patch_applied(count, machine_id)...
BYT: Intel_gpu_tools, igt_gem_seqno_wrap, FAIL(2, M36)PASS(1, M31) -> FAIL(1, M36)PASS(2, M36)
PNV: Intel_gpu_tools, igt_drv_hangman_error-state-basic, PASS(3, M24M25) -> DMESG_WARN(1, M25)PASS(2, M25)
PNV: Intel_gpu_tools, igt_gem_concurrent_blit_cpu-bcs-gpu-read-after-write-interruptible, PASS(3, M24M25) -> DMESG_WARN(1, M25)PASS(2, M25)
PNV: Intel_gpu_tools, igt_kms_setmode_invalid-clone-exclusive-crtc, DMESG_WARN(2, M25)PASS(1, M24) -> DMESG_WARN(1, M25)PASS(2, M25)
PNV: Intel_gpu_tools, igt_kms_setmode_invalid-clone-single-crtc, DMESG_WARN(2, M25)PASS(1, M24) -> DMESG_WARN(1, M25)PASS(2, M25)
ILK: Intel_gpu_tools, igt_drv_missed_irq_hang, DMESG_FAIL(2, M37)PASS(1, M37) -> DMESG_FAIL(1, M37)PASS(2, M37)
ILK: Intel_gpu_tools, igt_gem_close_race_gem-close-race, DMESG_WARN(1, M37)PASS(2, M37) -> PASS(3, M37)
IVB: Intel_gpu_tools, igt_gem_seqno_wrap, FAIL(2, M4)PASS(1, M4) -> FAIL(1, M4)PASS(2, M4)
SNB: Intel_gpu_tools, igt_drv_missed_irq_hang, FAIL(2, M35)PASS(1, M35) -> FAIL(1, M35)PASS(2, M35)
SNB: Intel_gpu_tools, igt_pm_rpm_gem-idle, PASS(1, M35) -> NO_RESULT(1, M35)PASS(2, M35)
HSW: Intel_gpu_tools, igt_drv_missed_irq_hang, DMESG_FAIL(2, M39)PASS(1, M39) -> FAIL(1, M39)PASS(2, M39)
HSW: Intel_gpu_tools, igt_gem_bad_reloc_negative-reloc, DMESG_FAIL(2, M39)PASS(1, M39) -> NSPT(1, M39)PASS(2, M39)
HSW: Intel_gpu_tools, igt_gem_seqno_wrap, DMESG_FAIL(2, M39)PASS(1, M39) -> FAIL(1, M39)PASS(2, M39)
HSW: Intel_gpu_tools, igt_kms_cursor_crc_cursor-128x128-sliding, DMESG_FAIL(2, M39)DMESG_WARN(1, M39) -> DMESG_WARN(2, M39)PASS(1, M39)
HSW: Intel_gpu_tools, igt_kms_cursor_crc_cursor-256x256-random, DMESG_FAIL(2, M39)DMESG_WARN(1, M39) -> DMESG_WARN(2, M39)PASS(1, M39)
HSW: Intel_gpu_tools, igt_pm_rpm_gem-idle, DMESG_FAIL(2, M39)PASS(1, M39) -> DMESG_WARN(1, M39)PASS(2, M39)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2284ecef..fcc9a4b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2526,7 +2526,7 @@  i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 	return (int32_t)(seq1 - seq2) >= 0;
 }
 
-int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
+int __must_check i915_gem_prepare_next_seqno(struct drm_device *dev);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
 int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad0458b..da2371a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2314,12 +2314,15 @@  int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 }
 
 int
-i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
+i915_gem_prepare_next_seqno(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* reserve 0 for non-seqno */
 	if (dev_priv->next_seqno == 0) {
+		/* Why is the full re-initialisation required? Is it only for
+		 * hardware semaphores? If so, could skip it in the case where
+		 * semaphores are disabled? */
 		int ret = i915_gem_init_seqno(dev, 0);
 		if (ret)
 			return ret;
@@ -2327,6 +2330,24 @@  i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 		dev_priv->next_seqno = 1;
 	}
 
+	return 0;
+}
+
+static int
+i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* reserve 0 for non-seqno */
+	if (dev_priv->next_seqno == 0) {
+		/* Should never get here! Must always call 'prepare_next' in
+		 * advance. This code is called during request submission.
+		 * Trying to wrap the seqno and the implicit idle() calls that
+		 * the wrap code makes are a bad idea at this point! */
+		DRM_ERROR("Need to wrap seqno at inopportune moment!\n");
+		return -EBUSY;
+	}
+
 	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
 	return 0;
 }
@@ -2369,6 +2390,11 @@  int __i915_add_request(struct intel_engine_cs *ring,
 			return ret;
 	}
 
+	/* Assign an identifier to track this request through the hardware: */
+	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
+	if (ret)
+		return ret;
+
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -2673,6 +2699,9 @@  void i915_gem_complete_requests_ring(struct intel_engine_cs *ring,
 		if (req->complete)
 			continue;
 
+		if (req->seqno == 0)
+			continue;
+
 		if (i915_seqno_passed(seqno, req->seqno))
 			req->complete = true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index edf1033..4d62978 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -803,6 +803,10 @@  static int logical_ring_alloc_request(struct intel_engine_cs *ring,
 	if (ring->outstanding_lazy_request)
 		return 0;
 
+	ret = i915_gem_prepare_next_seqno(ring->dev);
+	if (ret)
+		return ret;
+
 	request = kzalloc(sizeof(*request), GFP_KERNEL);
 	if (request == NULL)
 		return -ENOMEM;
@@ -811,12 +815,6 @@  static int logical_ring_alloc_request(struct intel_engine_cs *ring,
 	request->ring = ring;
 	request->uniq = dev_private->request_uniq++;
 
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		kfree(request);
-		return ret;
-	}
-
 	/* Hold a reference to the context this request belongs to
 	 * (we will need it when the time comes to emit/retire the
 	 * request).
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7b5c8b8..9ff4ea6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2034,6 +2034,10 @@  intel_ring_alloc_request(struct intel_engine_cs *ring)
 	if (ring->outstanding_lazy_request)
 		return 0;
 
+	ret = i915_gem_prepare_next_seqno(ring->dev);
+	if (ret)
+		return ret;
+
 	request = kzalloc(sizeof(*request), GFP_KERNEL);
 	if (request == NULL)
 		return -ENOMEM;
@@ -2042,12 +2046,6 @@  intel_ring_alloc_request(struct intel_engine_cs *ring)
 	request->ring = ring;
 	request->uniq = dev_private->request_uniq++;
 
-	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
-	if (ret) {
-		kfree(request);
-		return ret;
-	}
-
 	ring->outstanding_lazy_request = request;
 	return 0;
 }