Message ID | 20231106235929.454983-3-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Selftest for FAST_REQUEST feature | expand |
On 11/6/2023 3:59 PM, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > There is a mechanism for reporting errors from fire and forget H2G > messages. This is the only way to find out about almost any error in > the GuC backend submission path. So it would be useful to know that it > is working. > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > --- > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 ++ > drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 122 ++++++++++++++++++++++ > 3 files changed, 135 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index 2b6dfe62c8f2a..e22c12ce245ad 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -297,6 +297,10 @@ struct intel_guc { > * @number_guc_id_stolen: The number of guc_ids that have been stolen > */ > int number_guc_id_stolen; > + /** > + * @fast_response_selftest: Backdoor to CT handler for fast response selftest > + */ > + u32 fast_response_selftest; > #endif > }; > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index 89e314b3756bb..9d958afb78b7f 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r > found = true; > break; > } > + > +#ifdef CONFIG_DRM_I915_SELFTEST > + if (!found && ct_to_guc(ct)->fast_response_selftest) { > + CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n"); > + ct_to_guc(ct)->fast_response_selftest++; > + found = 1; found = true ? it's the same thing, but it's cleaner to assign boolean values to bool variables > + } > +#endif > + > if (!found) { > CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", > len, hxg[0], fence, ct->requests.last_fence); > diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c > index bfb72143566f6..97fbbb396336c 100644 > --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c > +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c > @@ -286,11 +286,133 @@ static int intel_guc_steal_guc_ids(void *arg) > return ret; > } > > +/* > + * Send a context schedule H2G message with an invalid context id. > + * This should generate a GUC_RESULT_INVALID_CONTEXT response. > + */ > +static int bad_h2g(struct intel_guc *guc) > +{ > + u32 action[3], len = 0; AFAICS This is a 2 DW command, so you can use action[2]. > + > + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; > + action[len++] = 0x12345678; > + > + return intel_guc_send_nb(guc, action, len, 0); > +} > + > +/* > + * Set a spinner running to make sure the system is alive and active, > + * then send a bad but asynchronous H2G command and wait to see if an > + * error response is returned. If no response is received or if the > + * spinner dies then the test will fail. > + */ > +#define FAST_RESPONSE_TIMEOUT_MS 1000 > +static int intel_guc_fast_request(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_context *ce; > + struct igt_spinner spin; > + struct i915_request *rq; > + intel_wakeref_t wakeref; > + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); > + ktime_t before, now, delta; > + bool spinning = false; > + u64 delta_ms; > + int ret = 0; > + > + if (!engine) > + return 0; > + > + wakeref = intel_runtime_pm_get(gt->uncore->rpm); > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) { > + ret = PTR_ERR(ce); > + gt_err(gt, "Failed to create spinner request: %pe\n", ce); > + goto err_pm; > + } > + > + ret = igt_spinner_init(&spin, engine->gt); > + if (ret) { > + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); > + goto err_pm; > + } > + spinning = true; > + > + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); > + intel_context_put(ce); > + if (IS_ERR(rq)) { > + ret = PTR_ERR(rq); > + gt_err(gt, "Failed to create spinner request: %pe\n", rq); > + goto err_spin; > + } > + > + ret = request_add_spin(rq, &spin); > + if (ret) { > + gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret)); > + goto err_rq; > + } > + > + gt->uc.guc.fast_response_selftest = 1; > + > + ret = bad_h2g(>->uc.guc); > + if (ret) { > + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); > + goto err_rq; > + } > + > + before = ktime_get(); > + while (gt->uc.guc.fast_response_selftest == 1) { > + ret = i915_request_wait(rq, 0, 1); > + if (ret != -ETIME) { > + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); > + goto err_rq; > + } > + now = ktime_get(); > + delta = ktime_sub(now, before); > + delta_ms = ktime_to_ms(delta); > + > + if (delta_ms > FAST_RESPONSE_TIMEOUT_MS) { > + gt_err(gt, "Timed out waiting for fast request error!\n"); > + ret = -ETIME; > + goto err_rq; > + } > + } This seems a bit convoluted. Why not just wait for either the request completion or the fast_response_selftest value to change? something like: ret = wait_for(fast_response_selftest != 1 || i915_request_completed(rq), FAST_RESPONSE_TIMEOUT_MS); if (ret || i915_request_completed(rq)) // error Daniele > + > + if (gt->uc.guc.fast_response_selftest != 2) { > + gt_err(gt, "Unexpected fast response count: %d\n", > + gt->uc.guc.fast_response_selftest); > + goto err_rq; > + } > + > + igt_spinner_end(&spin); > + spinning = false; > + > + ret = intel_selftest_wait_for_rq(rq); > + if (ret) { > + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); > + goto err_rq; > + } > + > +err_rq: > + i915_request_put(rq); > + > +err_spin: > + if (spinning) > + igt_spinner_end(&spin); > + igt_spinner_fini(&spin); > + > +err_pm: > + intel_runtime_pm_put(gt->uncore->rpm, wakeref); > + return ret; > +} > + > int intel_guc_live_selftests(struct drm_i915_private *i915) > { > static const struct i915_subtest tests[] = { > SUBTEST(intel_guc_scrub_ctbs), > SUBTEST(intel_guc_steal_guc_ids), > + SUBTEST(intel_guc_fast_request), > }; > struct intel_gt *gt = to_gt(i915); >
On 11/9/2023 12:33, Daniele Ceraolo Spurio wrote: > On 11/6/2023 3:59 PM, John.C.Harrison@Intel.com wrote: >> From: John Harrison <John.C.Harrison@Intel.com> >> >> There is a mechanism for reporting errors from fire and forget H2G >> messages. This is the only way to find out about almost any error in >> the GuC backend submission path. So it would be useful to know that it >> is working. >> >> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >> --- >> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + >> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 ++ >> drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 122 ++++++++++++++++++++++ >> 3 files changed, 135 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >> index 2b6dfe62c8f2a..e22c12ce245ad 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >> @@ -297,6 +297,10 @@ struct intel_guc { >> * @number_guc_id_stolen: The number of guc_ids that have been >> stolen >> */ >> int number_guc_id_stolen; >> + /** >> + * @fast_response_selftest: Backdoor to CT handler for fast >> response selftest >> + */ >> + u32 fast_response_selftest; >> #endif >> }; >> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >> index 89e314b3756bb..9d958afb78b7f 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >> @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct >> intel_guc_ct *ct, struct ct_incoming_msg *r >> found = true; >> break; >> } >> + >> +#ifdef CONFIG_DRM_I915_SELFTEST >> + if (!found && ct_to_guc(ct)->fast_response_selftest) { >> + CT_DEBUG(ct, "Assuming unsolicited response due to >> FAST_REQUEST selftest\n"); >> + ct_to_guc(ct)->fast_response_selftest++; >> + found = 1; > > found = true ? it's the same thing, but it's cleaner to assign boolean > values to bool variables Doh. > >> + } >> +#endif >> + >> if (!found) { >> CT_ERROR(ct, "Unsolicited response message: len %u, data >> %#x (fence %u, last %u)\n", >> len, hxg[0], fence, ct->requests.last_fence); >> diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c >> b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c >> index bfb72143566f6..97fbbb396336c 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c >> +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c >> @@ -286,11 +286,133 @@ static int intel_guc_steal_guc_ids(void *arg) >> return ret; >> } >> +/* >> + * Send a context schedule H2G message with an invalid context id. >> + * This should generate a GUC_RESULT_INVALID_CONTEXT response. >> + */ >> +static int bad_h2g(struct intel_guc *guc) >> +{ >> + u32 action[3], len = 0; > > AFAICS This is a 2 DW command, so you can use action[2]. Yup. Copy and paste bug. > >> + >> + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; >> + action[len++] = 0x12345678; >> + >> + return intel_guc_send_nb(guc, action, len, 0); >> +} >> + >> +/* >> + * Set a spinner running to make sure the system is alive and active, >> + * then send a bad but asynchronous H2G command and wait to see if an >> + * error response is returned. If no response is received or if the >> + * spinner dies then the test will fail. >> + */ >> +#define FAST_RESPONSE_TIMEOUT_MS 1000 >> +static int intel_guc_fast_request(void *arg) >> +{ >> + struct intel_gt *gt = arg; >> + struct intel_context *ce; >> + struct igt_spinner spin; >> + struct i915_request *rq; >> + intel_wakeref_t wakeref; >> + struct intel_engine_cs *engine = >> intel_selftest_find_any_engine(gt); >> + ktime_t before, now, delta; >> + bool spinning = false; >> + u64 delta_ms; >> + int ret = 0; >> + >> + if (!engine) >> + return 0; >> + >> + wakeref = intel_runtime_pm_get(gt->uncore->rpm); >> + >> + ce = intel_context_create(engine); >> + if (IS_ERR(ce)) { >> + ret = PTR_ERR(ce); >> + gt_err(gt, "Failed to create spinner request: %pe\n", ce); >> + goto err_pm; >> + } >> + >> + ret = igt_spinner_init(&spin, engine->gt); >> + if (ret) { >> + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); >> + goto err_pm; >> + } >> + spinning = true; >> + >> + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); >> + intel_context_put(ce); >> + if (IS_ERR(rq)) { >> + ret = PTR_ERR(rq); >> + gt_err(gt, "Failed to create spinner request: %pe\n", rq); >> + goto err_spin; >> + } >> + >> + ret = request_add_spin(rq, &spin); >> + if (ret) { >> + gt_err(gt, "Failed to add Spinner request: %pe\n", >> ERR_PTR(ret)); >> + goto err_rq; >> + } >> + >> + gt->uc.guc.fast_response_selftest = 1; >> + >> + ret = bad_h2g(>->uc.guc); >> + if (ret) { >> + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); >> + goto err_rq; >> + } >> + >> + before = ktime_get(); >> + while (gt->uc.guc.fast_response_selftest == 1) { >> + ret = i915_request_wait(rq, 0, 1); >> + if (ret != -ETIME) { >> + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); >> + goto err_rq; >> + } >> + now = ktime_get(); >> + delta = ktime_sub(now, before); >> + delta_ms = ktime_to_ms(delta); >> + >> + if (delta_ms > FAST_RESPONSE_TIMEOUT_MS) { >> + gt_err(gt, "Timed out waiting for fast request error!\n"); >> + ret = -ETIME; >> + goto err_rq; >> + } >> + } > > > This seems a bit convoluted. Why not just wait for either the request > completion or the fast_response_selftest value to change? > something like: > > ret = wait_for(fast_response_selftest != 1 || i915_request_completed(rq), > FAST_RESPONSE_TIMEOUT_MS); > > if (ret || i915_request_completed(rq)) > // error Um. Because my brain wasn't working when I knocked this up? John. > > > Daniele > >> + >> + if (gt->uc.guc.fast_response_selftest != 2) { >> + gt_err(gt, "Unexpected fast response count: %d\n", >> + gt->uc.guc.fast_response_selftest); >> + goto err_rq; >> + } >> + >> + igt_spinner_end(&spin); >> + spinning = false; >> + >> + ret = intel_selftest_wait_for_rq(rq); >> + if (ret) { >> + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); >> + goto err_rq; >> + } >> + >> +err_rq: >> + i915_request_put(rq); >> + >> +err_spin: >> + if (spinning) >> + igt_spinner_end(&spin); >> + igt_spinner_fini(&spin); >> + >> +err_pm: >> + intel_runtime_pm_put(gt->uncore->rpm, wakeref); >> + return ret; >> +} >> + >> int intel_guc_live_selftests(struct drm_i915_private *i915) >> { >> static const struct i915_subtest tests[] = { >> SUBTEST(intel_guc_scrub_ctbs), >> SUBTEST(intel_guc_steal_guc_ids), >> + SUBTEST(intel_guc_fast_request), >> }; >> struct intel_gt *gt = to_gt(i915); >
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b6dfe62c8f2a..e22c12ce245ad 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -297,6 +297,10 @@ struct intel_guc { * @number_guc_id_stolen: The number of guc_ids that have been stolen */ int number_guc_id_stolen; + /** + * @fast_response_selftest: Backdoor to CT handler for fast response selftest + */ + u32 fast_response_selftest; #endif }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 89e314b3756bb..9d958afb78b7f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r found = true; break; } + +#ifdef CONFIG_DRM_I915_SELFTEST + if (!found && ct_to_guc(ct)->fast_response_selftest) { + CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n"); + ct_to_guc(ct)->fast_response_selftest++; + found = 1; + } +#endif + if (!found) { CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", len, hxg[0], fence, ct->requests.last_fence); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bfb72143566f6..97fbbb396336c 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -286,11 +286,133 @@ static int intel_guc_steal_guc_ids(void *arg) return ret; } +/* + * Send a context schedule H2G message with an invalid context id. + * This should generate a GUC_RESULT_INVALID_CONTEXT response. + */ +static int bad_h2g(struct intel_guc *guc) +{ + u32 action[3], len = 0; + + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; + action[len++] = 0x12345678; + + return intel_guc_send_nb(guc, action, len, 0); +} + +/* + * Set a spinner running to make sure the system is alive and active, + * then send a bad but asynchronous H2G command and wait to see if an + * error response is returned. If no response is received or if the + * spinner dies then the test will fail. + */ +#define FAST_RESPONSE_TIMEOUT_MS 1000 +static int intel_guc_fast_request(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); + ktime_t before, now, delta; + bool spinning = false; + u64 delta_ms; + int ret = 0; + + if (!engine) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + gt_err(gt, "Failed to create spinner request: %pe\n", ce); + goto err_pm; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); + goto err_pm; + } + spinning = true; + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + gt_err(gt, "Failed to create spinner request: %pe\n", rq); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + gt->uc.guc.fast_response_selftest = 1; + + ret = bad_h2g(>->uc.guc); + if (ret) { + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + before = ktime_get(); + while (gt->uc.guc.fast_response_selftest == 1) { + ret = i915_request_wait(rq, 0, 1); + if (ret != -ETIME) { + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + now = ktime_get(); + delta = ktime_sub(now, before); + delta_ms = ktime_to_ms(delta); + + if (delta_ms > FAST_RESPONSE_TIMEOUT_MS) { + gt_err(gt, "Timed out waiting for fast request error!\n"); + ret = -ETIME; + goto err_rq; + } + } + + if (gt->uc.guc.fast_response_selftest != 2) { + gt_err(gt, "Unexpected fast response count: %d\n", + gt->uc.guc.fast_response_selftest); + goto err_rq; + } + + igt_spinner_end(&spin); + spinning = false; + + ret = intel_selftest_wait_for_rq(rq); + if (ret) { + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + +err_rq: + i915_request_put(rq); + +err_spin: + if (spinning) + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + +err_pm: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), SUBTEST(intel_guc_steal_guc_ids), + SUBTEST(intel_guc_fast_request), }; struct intel_gt *gt = to_gt(i915);