diff mbox series

[4/7] drm/i915/selftests: Add tests for GT and engine workaround verification

Message ID 20181130174412.15767-5-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Restore workarounds after engine reset and unify their handling | expand

Commit Message

Tvrtko Ursulin Nov. 30, 2018, 5:44 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Two simple selftests which test that both GT and engine workarounds are
not lost after either a full GPU reset, or after the per-engine ones.

(Including checks that one engine reset is not affecting workarounds not
belonging to itself.)

v2:
 * Rebase for series refactoring.
 * Add spinner for actual engine reset!
 * Add idle reset test as well. (Chris Wilson)
 * Share existing global_reset_lock. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/intel_workarounds.c      |   6 +
 drivers/gpu/drm/i915/selftests/igt_common.c   |  44 ++++++
 drivers/gpu/drm/i915/selftests/igt_common.h   |  15 ++
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  51 ++----
 .../drm/i915/selftests/intel_workarounds.c    | 147 +++++++++++++++++-
 6 files changed, 217 insertions(+), 47 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_common.c
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_common.h

Comments

Chris Wilson Nov. 30, 2018, 9:04 p.m. UTC | #1
Quoting Tvrtko Ursulin (2018-11-30 17:44:09)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Two simple selftests which test that both GT and engine workarounds are
> not lost after either a full GPU reset, or after the per-engine ones.
> 
> (Including checks that one engine reset is not affecting workarounds not
> belonging to itself.)
> 
> v2:
>  * Rebase for series refactoring.
>  * Add spinner for actual engine reset!
>  * Add idle reset test as well. (Chris Wilson)
>  * Share existing global_reset_lock. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  drivers/gpu/drm/i915/intel_workarounds.c      |   6 +
>  drivers/gpu/drm/i915/selftests/igt_common.c   |  44 ++++++
>  drivers/gpu/drm/i915/selftests/igt_common.h   |  15 ++
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  |  51 ++----
>  .../drm/i915/selftests/intel_workarounds.c    | 147 +++++++++++++++++-
>  6 files changed, 217 insertions(+), 47 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/selftests/igt_common.c
>  create mode 100644 drivers/gpu/drm/i915/selftests/igt_common.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 50a8fa8fce64..ceeb21f8aa0c 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -166,6 +166,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
>         selftests/i915_random.o \
>         selftests/i915_selftest.o \
>         selftests/igt_flush_test.o \
> +       selftests/igt_common.o \

I was anticipating igt_reset.c.

> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index 80396b3592f5..194a3c30554d 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -6,6 +6,8 @@
>  
>  #include "../i915_selftest.h"
>  
> +#include "igt_common.h"
> +#include "igt_flush_test.h"
>  #include "igt_spinner.h"
>  #include "igt_wedge_me.h"
>  #include "mock_context.h"
> @@ -290,7 +292,6 @@ static int live_reset_whitelist(void *arg)
>  {
>         struct drm_i915_private *i915 = arg;
>         struct intel_engine_cs *engine = i915->engine[RCS];
> -       struct i915_gpu_error *error = &i915->gpu_error;
>         struct whitelist w;
>         int err = 0;
>  
> @@ -302,8 +303,7 @@ static int live_reset_whitelist(void *arg)
>         if (!whitelist_build(engine, &w))
>                 return 0;
>  
> -       set_bit(I915_RESET_BACKOFF, &error->flags);
> -       set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
> +       igt_global_reset_lock(i915);
>  
>         if (intel_has_reset_engine(i915)) {
>                 err = check_whitelist_across_reset(engine,
> @@ -322,15 +322,152 @@ static int live_reset_whitelist(void *arg)
>         }
>  
>  out:
> -       clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
> -       clear_bit(I915_RESET_BACKOFF, &error->flags);
> +       igt_global_reset_unlock(i915);

Yup, makes sense.

>         return err;
>  }
>  
> +bool intel_engine_workarounds_verify(struct intel_engine_cs *engine,
> +                                    const char *from);
> +
> +static bool verify_gt_engine_wa(struct drm_i915_private *i915, const char *str)
> +{
> +       struct intel_engine_cs *engine;
> +       enum intel_engine_id id;
> +       bool ok = true;
> +
> +       ok &= intel_gt_workarounds_verify(i915, str);
> +
> +       for_each_engine(engine, i915, id)
> +               ok &= intel_engine_workarounds_verify(engine, str);
> +
> +       return ok;
> +}
> +
> +static int
> +live_gpu_reset_gt_engine_workarounds(void *arg)
> +{
> +       struct drm_i915_private *i915 = arg;
> +       struct i915_gpu_error *error = &i915->gpu_error;
> +       bool ok;
> +
> +       if (!intel_has_gpu_reset(i915))
> +               return 0;
> +
> +       pr_info("Verifying after GPU reset...\n");

Such noise! ;)

> +
> +       igt_global_reset_lock(i915);
> +
> +       ok = verify_gt_engine_wa(i915, "before reset");
> +       if (!ok)
> +               goto out;
> +
> +       intel_runtime_pm_get(i915);
> +       set_bit(I915_RESET_HANDOFF, &error->flags);
> +       i915_reset(i915, ALL_ENGINES, "live_workarounds");
> +       intel_runtime_pm_put(i915);
> +
> +       ok = verify_gt_engine_wa(i915, "after reset");
> +
> +out:
> +       igt_global_reset_unlock(i915);
> +
> +       return ok ? 0 : -ESRCH;
> +}
> +
> +static int
> +live_engine_reset_gt_engine_workarounds(void *arg)
> +{
> +       struct drm_i915_private *i915 = arg;
> +       struct intel_engine_cs *engine;
> +       struct i915_gem_context *ctx;
> +       struct igt_spinner spin;
> +       enum intel_engine_id id;
> +       struct i915_request *rq;
> +       int ret = 0;
> +
> +       if (!intel_has_reset_engine(i915))
> +               return 0;
> +
> +       ctx = kernel_context(i915);
> +       if (IS_ERR(ctx))
> +               return PTR_ERR(ctx);
> +
> +       for_each_engine(engine, i915, id) {
> +               bool ok;
> +
> +               pr_info("Verifying after %s reset...\n", engine->name);
> +
> +               igt_global_reset_lock(i915);

The key thing with it being global, is that you can take it globally :)
-Chris
Chris Wilson Nov. 30, 2018, 10:01 p.m. UTC | #2
Quoting Chris Wilson (2018-11-30 21:04:46)
> Quoting Tvrtko Ursulin (2018-11-30 17:44:09)
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > Two simple selftests which test that both GT and engine workarounds are
> > not lost after either a full GPU reset, or after the per-engine ones.
> > 
> > (Including checks that one engine reset is not affecting workarounds not
> > belonging to itself.)
> > 
> > v2:
> >  * Rebase for series refactoring.
> >  * Add spinner for actual engine reset!
> >  * Add idle reset test as well. (Chris Wilson)
> >  * Share existing global_reset_lock. (Chris Wilson)
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> > +       for_each_engine(engine, i915, id) {
> > +               bool ok;
> > +
> > +               pr_info("Verifying after %s reset...\n", engine->name);
> > +
> > +               igt_global_reset_lock(i915);
> 
> The key thing with it being global, is that you can take it globally :)

I think that tidies up the loop innards to make it worthwhile, so with
that
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 50a8fa8fce64..ceeb21f8aa0c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -166,6 +166,7 @@  i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_flush_test.o \
+	selftests/igt_common.o \
 	selftests/igt_spinner.o
 
 # virtual gpu code
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index d80ea8172222..5a4d70e02b63 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -1303,4 +1303,10 @@  void intel_engine_workarounds_apply(struct intel_engine_cs *engine)
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_workarounds.c"
+
+bool intel_engine_workarounds_verify(struct intel_engine_cs *engine,
+				     const char *from)
+{
+	return wa_list_verify(engine->i915, &engine->wa_list, from);
+}
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/igt_common.c b/drivers/gpu/drm/i915/selftests/igt_common.c
new file mode 100644
index 000000000000..ad00fdf895be
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_common.c
@@ -0,0 +1,44 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_common.h"
+
+#include "../i915_drv.h"
+#include "../intel_ringbuffer.h"
+
+void igt_global_reset_lock(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	pr_debug("%s: current gpu_error=%08lx\n",
+		 __func__, i915->gpu_error.flags);
+
+	while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
+		wait_event(i915->gpu_error.reset_queue,
+			   !test_bit(I915_RESET_BACKOFF,
+				     &i915->gpu_error.flags));
+
+	for_each_engine(engine, i915, id) {
+		while (test_and_set_bit(I915_RESET_ENGINE + id,
+					&i915->gpu_error.flags))
+			wait_on_bit(&i915->gpu_error.flags,
+				    I915_RESET_ENGINE + id,
+				    TASK_UNINTERRUPTIBLE);
+	}
+}
+
+void igt_global_reset_unlock(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id)
+		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+	wake_up_all(&i915->gpu_error.reset_queue);
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_common.h b/drivers/gpu/drm/i915/selftests/igt_common.h
new file mode 100644
index 000000000000..3df12ed10200
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_common.h
@@ -0,0 +1,15 @@ 
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __I915_SELFTESTS_IGT_COMMON_H__
+#define __I915_SELFTESTS_IGT_COMMON_H__
+
+#include "../i915_drv.h"
+
+void igt_global_reset_lock(struct drm_i915_private *i915);
+void igt_global_reset_unlock(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index defe671130ab..af31e4a4979d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -25,6 +25,7 @@ 
 #include <linux/kthread.h>
 
 #include "../i915_selftest.h"
+#include "igt_common.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
 #include "igt_wedge_me.h"
@@ -348,40 +349,6 @@  static int igt_hang_sanitycheck(void *arg)
 	return err;
 }
 
-static void global_reset_lock(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	pr_debug("%s: current gpu_error=%08lx\n",
-		 __func__, i915->gpu_error.flags);
-
-	while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
-		wait_event(i915->gpu_error.reset_queue,
-			   !test_bit(I915_RESET_BACKOFF,
-				     &i915->gpu_error.flags));
-
-	for_each_engine(engine, i915, id) {
-		while (test_and_set_bit(I915_RESET_ENGINE + id,
-					&i915->gpu_error.flags))
-			wait_on_bit(&i915->gpu_error.flags,
-				    I915_RESET_ENGINE + id,
-				    TASK_UNINTERRUPTIBLE);
-	}
-}
-
-static void global_reset_unlock(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, i915, id)
-		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
-
-	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
-	wake_up_all(&i915->gpu_error.reset_queue);
-}
-
 static int igt_global_reset(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -390,7 +357,7 @@  static int igt_global_reset(void *arg)
 
 	/* Check that we can issue a global GPU reset */
 
-	global_reset_lock(i915);
+	igt_global_reset_lock(i915);
 	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -405,7 +372,7 @@  static int igt_global_reset(void *arg)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
-	global_reset_unlock(i915);
+	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		err = -EIO;
@@ -936,7 +903,7 @@  static int igt_reset_wait(void *arg)
 
 	/* Check that we detect a stuck waiter and issue a reset */
 
-	global_reset_lock(i915);
+	igt_global_reset_lock(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	err = hang_init(&h, i915);
@@ -988,7 +955,7 @@  static int igt_reset_wait(void *arg)
 	hang_fini(&h);
 unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	global_reset_unlock(i915);
+	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO;
@@ -1066,7 +1033,7 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	/* Check that we can recover an unbind stuck on a hanging request */
 
-	global_reset_lock(i915);
+	igt_global_reset_lock(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	err = hang_init(&h, i915);
@@ -1186,7 +1153,7 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	hang_fini(&h);
 unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	global_reset_unlock(i915);
+	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO;
@@ -1266,7 +1233,7 @@  static int igt_reset_queue(void *arg)
 
 	/* Check that we replay pending requests following a hang */
 
-	global_reset_lock(i915);
+	igt_global_reset_lock(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	err = hang_init(&h, i915);
@@ -1397,7 +1364,7 @@  static int igt_reset_queue(void *arg)
 	hang_fini(&h);
 unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	global_reset_unlock(i915);
+	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 80396b3592f5..194a3c30554d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -6,6 +6,8 @@ 
 
 #include "../i915_selftest.h"
 
+#include "igt_common.h"
+#include "igt_flush_test.h"
 #include "igt_spinner.h"
 #include "igt_wedge_me.h"
 #include "mock_context.h"
@@ -290,7 +292,6 @@  static int live_reset_whitelist(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine = i915->engine[RCS];
-	struct i915_gpu_error *error = &i915->gpu_error;
 	struct whitelist w;
 	int err = 0;
 
@@ -302,8 +303,7 @@  static int live_reset_whitelist(void *arg)
 	if (!whitelist_build(engine, &w))
 		return 0;
 
-	set_bit(I915_RESET_BACKOFF, &error->flags);
-	set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
+	igt_global_reset_lock(i915);
 
 	if (intel_has_reset_engine(i915)) {
 		err = check_whitelist_across_reset(engine,
@@ -322,15 +322,152 @@  static int live_reset_whitelist(void *arg)
 	}
 
 out:
-	clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
-	clear_bit(I915_RESET_BACKOFF, &error->flags);
+	igt_global_reset_unlock(i915);
 	return err;
 }
 
+bool intel_engine_workarounds_verify(struct intel_engine_cs *engine,
+				     const char *from);
+
+static bool verify_gt_engine_wa(struct drm_i915_private *i915, const char *str)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	bool ok = true;
+
+	ok &= intel_gt_workarounds_verify(i915, str);
+
+	for_each_engine(engine, i915, id)
+		ok &= intel_engine_workarounds_verify(engine, str);
+
+	return ok;
+}
+
+static int
+live_gpu_reset_gt_engine_workarounds(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gpu_error *error = &i915->gpu_error;
+	bool ok;
+
+	if (!intel_has_gpu_reset(i915))
+		return 0;
+
+	pr_info("Verifying after GPU reset...\n");
+
+	igt_global_reset_lock(i915);
+
+	ok = verify_gt_engine_wa(i915, "before reset");
+	if (!ok)
+		goto out;
+
+	intel_runtime_pm_get(i915);
+	set_bit(I915_RESET_HANDOFF, &error->flags);
+	i915_reset(i915, ALL_ENGINES, "live_workarounds");
+	intel_runtime_pm_put(i915);
+
+	ok = verify_gt_engine_wa(i915, "after reset");
+
+out:
+	igt_global_reset_unlock(i915);
+
+	return ok ? 0 : -ESRCH;
+}
+
+static int
+live_engine_reset_gt_engine_workarounds(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	struct igt_spinner spin;
+	enum intel_engine_id id;
+	struct i915_request *rq;
+	int ret = 0;
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
+
+	ctx = kernel_context(i915);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	for_each_engine(engine, i915, id) {
+		bool ok;
+
+		pr_info("Verifying after %s reset...\n", engine->name);
+
+		igt_global_reset_lock(i915);
+
+		ok = verify_gt_engine_wa(i915, "before reset");
+		if (!ok) {
+			ret = -ESRCH;
+			goto err;
+		}
+
+		intel_runtime_pm_get(i915);
+		i915_reset_engine(engine, "live_workarounds");
+		intel_runtime_pm_put(i915);
+
+		ok = verify_gt_engine_wa(i915, "after idle reset");
+		if (!ok) {
+			ret = -ESRCH;
+			goto err;
+		}
+
+		ret = igt_spinner_init(&spin, i915);
+		if (ret)
+			goto err;
+
+		intel_runtime_pm_get(i915);
+
+		rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			igt_spinner_fini(&spin);
+			goto err;
+		}
+
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			pr_err("Spinner failed to start\n");
+			igt_spinner_fini(&spin);
+			ret = -ETIMEDOUT;
+			goto err;
+		}
+
+		i915_reset_engine(engine, "live_workarounds");
+
+		intel_runtime_pm_put(i915);
+
+		igt_spinner_end(&spin);
+		igt_spinner_fini(&spin);
+
+		ok = verify_gt_engine_wa(i915, "after busy reset");
+		if (!ok) {
+			ret = -ESRCH;
+			goto err;
+		}
+
+		igt_global_reset_unlock(i915);
+	}
+
+err:
+	igt_global_reset_unlock(i915);
+	kernel_context_close(ctx);
+
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+
+	return ret;
+}
+
 int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_reset_whitelist),
+		SUBTEST(live_gpu_reset_gt_engine_workarounds),
+		SUBTEST(live_engine_reset_gt_engine_workarounds),
 	};
 	int err;