diff mbox

[2/2] drm/i915/selftests: Exercise resetting in the middle of a wait-on-fence

Message ID 20180719194746.19111-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 19, 2018, 7:47 p.m. UTC
On older HW, gen2/3, fence registers are used for detiling GPU commands
and as such changing those registers requires serialisation with the
requests on the GPU. Anything running on the GPU is subject to a hang,
and so we must be able to recover cleanly in the middle of a stuck wait
on a fence register.

We can simulate using the fence on the GPU simply by marking the fence
as active on the request for this vma, the interface being common to all
gen, thus broadening the test.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 85 +++++++++++++++++--
 1 file changed, 77 insertions(+), 8 deletions(-)

Comments

Matthew Auld July 26, 2018, 12:26 p.m. UTC | #1
On 19 July 2018 at 20:47, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On older HW, gen2/3, fence registers are used for detiling GPU commands
> and as such changing those registers requires serialisation with the
> requests on the GPU. Anything running on the GPU is subject to a hang,
> and so we must be able to recover cleanly in the middle of a stuck wait
> on a fence register.
>
> We can simulate using the fence on the GPU simply by marking the fence
> as active on the request for this vma, the interface being common to all
> gen, thus broadening the test.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Chris Wilson July 26, 2018, 12:58 p.m. UTC | #2
Quoting Matthew Auld (2018-07-26 13:26:08)
> On 19 July 2018 at 20:47, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On older HW, gen2/3, fence registers are used for detiling GPU commands
> > and as such changing those registers requires serialisation with the
> > requests on the GPU. Anything running on the GPU is subject to a hang,
> > and so we must be able to recover cleanly in the middle of a stuck wait
> > on a fence register.
> >
> > We can simulate using the fence on the GPU simply by marking the fence
> > as active on the request for this vma, the interface being common to all
> > gen, thus broadening the test.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>

Ta, this test has proven itself quite useful at picking out some of the
nasty deadlocks in trying to make reset BKLless.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index b2d6d15f025a..db378226ac10 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1018,8 +1018,41 @@  static int evict_vma(void *data)
 	return err;
 }
 
+static int evict_fence(void *data)
+{
+	struct evict_vma *arg = data;
+	struct drm_i915_private *i915 = arg->vma->vm->i915;
+	int err;
+
+	complete(&arg->completion);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	/* Mark the fence register as dirty to force the mmio update. */
+	err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
+	if (err) {
+		pr_err("Invalid Y-tiling settings; err:%d\n", err);
+		goto out_unlock;
+	}
+
+	err = i915_vma_pin_fence(arg->vma);
+	if (err) {
+		pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
+		goto out_unlock;
+	}
+
+	i915_vma_unpin_fence(arg->vma);
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
 static int __igt_reset_evict_vma(struct drm_i915_private *i915,
-				 struct i915_address_space *vm)
+				 struct i915_address_space *vm,
+				 int (*fn)(void *),
+				 unsigned int flags)
 {
 	struct drm_i915_gem_object *obj;
 	struct task_struct *tsk = NULL;
@@ -1040,12 +1073,20 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	if (err)
 		goto unlock;
 
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(i915, SZ_1M);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto fini;
 	}
 
+	if (flags & EXEC_OBJECT_NEEDS_FENCE) {
+		err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512);
+		if (err) {
+			pr_err("Invalid X-tiling settings; err:%d\n", err);
+			goto out_obj;
+		}
+	}
+
 	arg.vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(arg.vma)) {
 		err = PTR_ERR(arg.vma);
@@ -1059,11 +1100,28 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	}
 
 	err = i915_vma_pin(arg.vma, 0, 0,
-			   i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER);
-	if (err)
+			   i915_vma_is_ggtt(arg.vma) ?
+			   PIN_GLOBAL | PIN_MAPPABLE :
+			   PIN_USER);
+	if (err) {
+		i915_request_add(rq);
 		goto out_obj;
+	}
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE) {
+		err = i915_vma_pin_fence(arg.vma);
+		if (err) {
+			pr_err("Unable to pin X-tiled fence; err:%d\n", err);
+			i915_vma_unpin(arg.vma);
+			i915_request_add(rq);
+			goto out_obj;
+		}
+	}
 
-	err = i915_vma_move_to_active(arg.vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(arg.vma, rq, flags);
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_vma_unpin_fence(arg.vma);
 	i915_vma_unpin(arg.vma);
 
 	i915_request_get(rq);
@@ -1086,7 +1144,7 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	init_completion(&arg.completion);
 
-	tsk = kthread_run(evict_vma, &arg, "igt/evict_vma");
+	tsk = kthread_run(fn, &arg, "igt/evict_vma");
 	if (IS_ERR(tsk)) {
 		err = PTR_ERR(tsk);
 		tsk = NULL;
@@ -1137,7 +1195,8 @@  static int igt_reset_evict_ggtt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 
-	return __igt_reset_evict_vma(i915, &i915->ggtt.vm);
+	return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+				     evict_vma, EXEC_OBJECT_WRITE);
 }
 
 static int igt_reset_evict_ppgtt(void *arg)
@@ -1161,13 +1220,22 @@  static int igt_reset_evict_ppgtt(void *arg)
 
 	err = 0;
 	if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */
-		err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm);
+		err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm,
+					    evict_vma, EXEC_OBJECT_WRITE);
 
 out:
 	mock_file_free(i915, file);
 	return err;
 }
 
+static int igt_reset_evict_fence(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+
+	return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+				     evict_fence, EXEC_OBJECT_NEEDS_FENCE);
+}
+
 static int wait_for_others(struct drm_i915_private *i915,
 			   struct intel_engine_cs *exclude)
 {
@@ -1417,6 +1485,7 @@  int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_reset_wait),
 		SUBTEST(igt_reset_evict_ggtt),
 		SUBTEST(igt_reset_evict_ppgtt),
+		SUBTEST(igt_reset_evict_fence),
 		SUBTEST(igt_handle_error),
 	};
 	bool saved_hangcheck;