diff mbox series

drm/i915/selftest/gsc: Ensure GSC Proxy init completes before selftests

Message ID 20230512235824.1399566-1-alan.previn.teres.alexis@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/selftest/gsc: Ensure GSC Proxy init completes before selftests | expand

Commit Message

Teres Alexis, Alan Previn May 12, 2023, 11:58 p.m. UTC
On MTL, if the GSC Proxy init flows haven't completed, submissions to the
GSC engine will fail. Those init flows are dependent on the mei's
gsc_proxy component that is loaded in parallel with i915 and a
worker that could potentially start after i915 driver init is done.

That said, all subsytems that access the GSC engine today does check
for such init flow completion before using the GSC engine. However,
selftests currently don't wait on anything before starting.

To fix this, add a waiter function at the start of __run_selftests
that waits for gsc-proxy init flows to complete. While implementing this,
use an table of function pointers so its scalable to add additional
waiter functions for future such "wait on dependency" cases that.

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
---
 .../gpu/drm/i915/selftests/i915_selftest.c    | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)


base-commit: 222ff19f23b0bd6aca0b52001d69699f78f5a206

Comments

Teres Alexis, Alan Previn May 24, 2023, 5:15 p.m. UTC | #1
The error below seems unrelated to the change in this patch. In fact test below fails on APL which wont excersize the patch code change.
However, from internal testing we did see cases where CI's selftest timeout is lower than the GSC Proxy requires to complete
(i.e. the selftest would bail with timeout because it was waiting for the GSC proxy to complete but the completion came in after the timeout).

So might need to re-rev this anyway with a longer timeout. (which typically takes longer for first time boot, not at driver-reload or suspend-resume)

...alan

On Sat, 2023-05-13 at 04:55 +0000, Patchwork wrote:
> Patch Details
> Series: drm/i915/selftest/gsc: Ensure GSC Proxy init completes before selftests
> URL:    https://patchwork.freedesktop.org/series/117713/
> State:  failure
> Details:        https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_117713v1/index.html
> CI Bug Log - changes from CI_DRM_13143_full -> Patchwork_117713v1_full
> Summary
> 
> FAILURE
> 
> Serious unknown changes coming with Patchwork_117713v1_full absolutely need to be
> verified manually.
> 
> If you think the reported changes have nothing to do with the changes
> introduced in Patchwork_117713v1_full, please notify your bug team to allow them
> to document this new failure mode, which will reduce false positives in CI.
> 
> Participating hosts (7 -> 7)
> 
> No changes in participating hosts
> 
> Possible new issues
> 
> Here are the unknown changes that may have been introduced in Patchwork_117713v1_full:
> 
> IGT changes
> Possible regressions
> 
>   *   igt@kms_plane@plane-panning-bottom-right-suspend@pipe-b-planes:
>      *   shard-apl: PASS<https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_13143/shard-apl7/igt@kms_plane@plane-panning-bottom-right-suspend@pipe-b-planes.html> -> ABORT<https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_117713v1/shard-apl2/igt@kms_plane@plane-panning-bottom-right-suspend@pipe-b-planes.html>
> 
alan:snip
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 39da0fb0d6d2..a05effdbce94 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -24,6 +24,8 @@ 
 #include <linux/random.h>
 
 #include "gt/intel_gt_pm.h"
+#include "gt/uc/intel_gsc_fw.h"
+
 #include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_selftest.h"
@@ -127,6 +129,55 @@  static void set_default_test_all(struct selftest *st, unsigned int count)
 		st[i].enabled = true;
 }
 
+static int
+__wait_gsc_proxy_completed(struct drm_i915_private *i915,
+			   unsigned long timeout_ms)
+{
+	bool need_to_wait = (IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY) &&
+			     i915->media_gt &&
+			     HAS_ENGINE(i915->media_gt, GSC0) &&
+			     intel_uc_fw_is_loadable(&i915->media_gt->uc.gsc.fw));
+
+	if (need_to_wait &&
+	    (wait_for(intel_gsc_uc_fw_proxy_init_done(&i915->media_gt->uc.gsc),
+	    timeout_ms)))
+		return -ETIME;
+
+	return 0;
+}
+
+struct __startup_waiter {
+	const char *name;
+	int (*wait_to_completed)(struct drm_i915_private *i915, unsigned long timeout_ms);
+};
+
+static struct __startup_waiter all_startup_waiters[] = { \
+	{"gsc_proxy", __wait_gsc_proxy_completed} \
+	};
+
+static int __wait_on_all_system_dependencies(struct drm_i915_private *i915)
+{
+	struct __startup_waiter *waiter = all_startup_waiters;
+	int count = ARRAY_SIZE(all_startup_waiters);
+	int ret;
+
+	if (!waiter || !count || !i915)
+		return 0;
+
+	for (; count--; waiter++) {
+		if (!waiter->wait_to_completed)
+			continue;
+		ret = waiter->wait_to_completed(i915, i915_selftest.timeout_ms);
+		if (ret) {
+			pr_info(DRIVER_NAME ": Pre-selftest waiter %s failed with %d\n",
+				waiter->name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int __run_selftests(const char *name,
 			   struct selftest *st,
 			   unsigned int count,
@@ -134,6 +185,8 @@  static int __run_selftests(const char *name,
 {
 	int err = 0;
 
+	__wait_on_all_system_dependencies(data);
+
 	while (!i915_selftest.random_seed)
 		i915_selftest.random_seed = get_random_u32();