diff mbox series

[i-g-t,7/7] test/i915: Add i915_rc6_ctx_corruption

Message ID 20191113154913.8787-7-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [i-g-t,1/7] lib/igt_dummyload: Send batch as first | expand

Commit Message

Mika Kuoppala Nov. 13, 2019, 3:49 p.m. UTC
From: Imre Deak <imre.deak@intel.com>

Add a test to exercise the kernel's mechanism to detection of RC6
context corruptions, take the necessary action in response (disable
RC6 and runtime PM) and recover when possible (after system
suspend/resume).

v2:
- Skip test on non-existing engines.
- Fix for old kernels where the command parser returned EINVAL
  instead of EACCESS for a banned privilidged command.

Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 tests/Makefile.sources               |   3 +
 tests/i915/i915_rc6_ctx_corruption.c | 196 +++++++++++++++++++++++++++
 tests/meson.build                    |   1 +
 3 files changed, 200 insertions(+)
 create mode 100644 tests/i915/i915_rc6_ctx_corruption.c

Comments

Chris Wilson Nov. 13, 2019, 4:18 p.m. UTC | #1
Quoting Mika Kuoppala (2019-11-13 15:49:13)
> From: Imre Deak <imre.deak@intel.com>
> 
> Add a test to exercise the kernel's mechanism to detection of RC6
> context corruptions, take the necessary action in response (disable
> RC6 and runtime PM) and recover when possible (after system
> suspend/resume).
> 
> v2:
> - Skip test on non-existing engines.
> - Fix for old kernels where the command parser returned EINVAL
>   instead of EACCESS for a banned privilidged command.
> 
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  tests/Makefile.sources               |   3 +
>  tests/i915/i915_rc6_ctx_corruption.c | 196 +++++++++++++++++++++++++++
>  tests/meson.build                    |   1 +
>  3 files changed, 200 insertions(+)
>  create mode 100644 tests/i915/i915_rc6_ctx_corruption.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 75c79edb..c0f401c7 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -529,6 +529,9 @@ i915_pm_sseu_SOURCES = i915/i915_pm_sseu.c
>  TESTS_progs += i915_query
>  i915_query_SOURCES = i915/i915_query.c
>  
> +TESTS_progs += i915_rc6_ctx_corruption
> +i915_rc6_ctx_corruption_SOURCES = i915/i915_rc6_ctx_corruption.c
> +
>  TESTS_progs += i915_selftest
>  i915_selftest_SOURCES = i915/i915_selftest.c
>  
> diff --git a/tests/i915/i915_rc6_ctx_corruption.c b/tests/i915/i915_rc6_ctx_corruption.c
> new file mode 100644
> index 00000000..a3326307
> --- /dev/null
> +++ b/tests/i915/i915_rc6_ctx_corruption.c
> @@ -0,0 +1,196 @@
> +/*
> + * Copyright © 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <stdint.h>
> +
> +#include "igt.h"
> +#include "igt_gt.h"
> +#include "igt_device.h"
> +
> +#define CTX_INFO_REG                   0xA300
> +
> +static int drm_fd;
> +static struct pci_device *pci_dev;
> +static uint32_t devid;
> +
> +static int read_reg_on_engine(const struct intel_execution_engine2 *engine,
> +                             int reg_address, uint32_t *reg_val)
> +{
> +       const bool r64b = intel_gen(devid) >= 8;
> +       struct drm_i915_gem_exec_object2 obj[2];
> +       struct drm_i915_gem_relocation_entry reloc[1];
> +       struct drm_i915_gem_execbuffer2 execbuf;
> +       uint32_t *batch;
> +       uint32_t *dst_buf;
> +       int ret;
> +       int i;
> +
> +       memset(obj, 0, sizeof(obj));
> +       obj[0].handle = gem_create(drm_fd, 4096);
> +       obj[1].handle = gem_create(drm_fd, 4096);
> +       obj[1].relocs_ptr = to_user_pointer(reloc);
> +       obj[1].relocation_count = 1;
> +
> +       batch = gem_mmap__cpu(drm_fd, obj[1].handle, 0, 4096, PROT_WRITE);
> +       gem_set_domain(drm_fd, obj[1].handle,
> +                      I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> +
> +       i = 0;
> +       batch[i++] = 0x24 << 23 | (1 + r64b); /* SRM */
> +       batch[i++] = reg_address;
> +       reloc[0].target_handle = obj[0].handle;
> +       reloc[0].presumed_offset = obj[0].offset;
> +       reloc[0].offset = i * sizeof(uint32_t);
> +       reloc[0].delta = 0;
> +       reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +       reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +       batch[i++] = reloc[0].delta;
> +       if (r64b)
> +               batch[i++] = 0;
> +
> +       batch[i++] = MI_BATCH_BUFFER_END;
> +       munmap(batch, 4096);
> +
> +       memset(&execbuf, 0, sizeof(execbuf));
> +       execbuf.buffers_ptr = to_user_pointer(obj);
> +       execbuf.buffer_count = 2;
> +       execbuf.flags = engine->flags;
> +
> +       ret = __gem_execbuf(drm_fd, &execbuf);
> +       gem_close(drm_fd, obj[1].handle);
> +
> +       if (ret)
> +               goto err;
> +
> +       if (reg_val) {
> +               dst_buf = gem_mmap__cpu(drm_fd, obj[0].handle, 0, 4096,
> +                                       PROT_READ);
> +               gem_set_domain(drm_fd, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
> +
> +               *reg_val = dst_buf[0];
> +               munmap(dst_buf, 4096);
> +       }
> +
> +err:
> +       gem_close(drm_fd, obj[0].handle);
> +
> +       return ret;
> +}
> +
> +static bool need_ctx_corruption_wa(void)
> +{
> +       return IS_BROADWELL(devid) || intel_gen(devid) == 9;

That's a little self servicing and doesn't help find bugs where you
weren't expecting. The test would seem to be useless at predicting new
failures.

This should just be a selftest imo.
-Chris
diff mbox series

Patch

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 75c79edb..c0f401c7 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -529,6 +529,9 @@  i915_pm_sseu_SOURCES = i915/i915_pm_sseu.c
 TESTS_progs += i915_query
 i915_query_SOURCES = i915/i915_query.c
 
+TESTS_progs += i915_rc6_ctx_corruption
+i915_rc6_ctx_corruption_SOURCES = i915/i915_rc6_ctx_corruption.c
+
 TESTS_progs += i915_selftest
 i915_selftest_SOURCES = i915/i915_selftest.c
 
diff --git a/tests/i915/i915_rc6_ctx_corruption.c b/tests/i915/i915_rc6_ctx_corruption.c
new file mode 100644
index 00000000..a3326307
--- /dev/null
+++ b/tests/i915/i915_rc6_ctx_corruption.c
@@ -0,0 +1,196 @@ 
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "igt.h"
+#include "igt_gt.h"
+#include "igt_device.h"
+
+#define CTX_INFO_REG			0xA300
+
+static int drm_fd;
+static struct pci_device *pci_dev;
+static uint32_t devid;
+
+static int read_reg_on_engine(const struct intel_execution_engine2 *engine,
+			      int reg_address, uint32_t *reg_val)
+{
+	const bool r64b = intel_gen(devid) >= 8;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[1];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t *batch;
+	uint32_t *dst_buf;
+	int ret;
+	int i;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(drm_fd, 4096);
+	obj[1].handle = gem_create(drm_fd, 4096);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	obj[1].relocation_count = 1;
+
+	batch = gem_mmap__cpu(drm_fd, obj[1].handle, 0, 4096, PROT_WRITE);
+	gem_set_domain(drm_fd, obj[1].handle,
+		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+
+	i = 0;
+	batch[i++] = 0x24 << 23 | (1 + r64b); /* SRM */
+	batch[i++] = reg_address;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = i * sizeof(uint32_t);
+	reloc[0].delta = 0;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = reloc[0].delta;
+	if (r64b)
+		batch[i++] = 0;
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+	execbuf.flags = engine->flags;
+
+	ret = __gem_execbuf(drm_fd, &execbuf);
+	gem_close(drm_fd, obj[1].handle);
+
+	if (ret)
+		goto err;
+
+	if (reg_val) {
+		dst_buf = gem_mmap__cpu(drm_fd, obj[0].handle, 0, 4096,
+					PROT_READ);
+		gem_set_domain(drm_fd, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+		*reg_val = dst_buf[0];
+		munmap(dst_buf, 4096);
+	}
+
+err:
+	gem_close(drm_fd, obj[0].handle);
+
+	return ret;
+}
+
+static bool need_ctx_corruption_wa(void)
+{
+	return IS_BROADWELL(devid) || intel_gen(devid) == 9;
+}
+
+static void test_read_on_engine(const struct intel_execution_engine2 *engine)
+{
+	int ret;
+
+	gem_require_ring(drm_fd, engine->flags);
+
+	/*
+	 * Recovery is only possible via an S3 suspend/resume or reboot, so
+	 * skip the test if there's no means to recover. Currently this is
+	 * only the case when the selected suspend-to-mem mode is 'deep' (S3).
+	 */
+	igt_require(!need_ctx_corruption_wa() ||
+		    igt_get_suspend_to_mem_mode() == SUSPEND_TO_MEM_DEEP);
+
+	ret = read_reg_on_engine(engine, CTX_INFO_REG, NULL);
+
+	/*
+	 * Do we have the blitter engine parser in place, catching the bad
+	 * SRM?
+	 */
+	if ((ret == -EACCES || ret == -EINVAL) &&
+	    engine->class == I915_ENGINE_CLASS_COPY)
+		return;
+
+	igt_assert_eq(ret, 0);
+
+	/*
+	 * On a plarform not affected by the corruption issue we are done here
+	 * if not yet hung.
+	 */
+	if (!need_ctx_corruption_wa())
+		return;
+
+	/* Let it suspend if it can. */
+	sleep(1);
+
+	/*
+	 * Due to detection of the corruption, runtime suspend should be
+	 * disabled now.
+	 */
+	igt_assert(igt_wait_for_pm_status(IGT_RUNTIME_PM_STATUS_ACTIVE));
+
+	/*
+	 * Let's check now if system suspend/resume recovers from the problem
+	 * as it should.
+	 */
+	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
+	igt_assert(igt_wait_for_pm_status(IGT_RUNTIME_PM_STATUS_SUSPENDED));
+}
+
+static void setup_environment(void)
+{
+	drmModeRes *res;
+
+	drm_fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(drm_fd);
+	pci_dev = intel_get_pci_device();
+	devid = pci_dev->device_id;
+
+	igt_require(igt_setup_runtime_pm());
+
+	kmstest_set_vt_graphics_mode();
+
+	res = drmModeGetResources(drm_fd);
+	igt_assert(res);
+
+	kmstest_unset_all_crtcs(drm_fd, res);
+
+	igt_require(igt_wait_for_pm_status(IGT_RUNTIME_PM_STATUS_SUSPENDED));
+}
+
+static void cleanup_environment(void)
+{
+	close(drm_fd);
+}
+
+igt_main {
+	const struct intel_execution_engine2 *e;
+
+	igt_fixture {
+		setup_environment();
+	}
+
+	for (e = intel_execution_engines2; e->name; e++) {
+		igt_subtest_f("basic-%s", e->name)
+			test_read_on_engine(e);
+	}
+
+	igt_fixture {
+		cleanup_environment();
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 43899b95..56107695 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -237,6 +237,7 @@  i915_progs = [
 	'i915_pm_dc',
 	'i915_pm_rps',
 	'i915_pm_sseu',
+	'i915_rc6_ctx_corruption',
 	'i915_query',
 	'i915_selftest',
 	'i915_suspend',