Message ID | fe2vu5h7v7ooxbhwpbfsypxg5mjrnt56gc3cgrqpnhgrgce334@qfrv2skxrp47 (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/gt: Delete the live_hearbeat_fast selftest | expand |
Hi Krzysztof, On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote: > The test is trying to push the heartbeat frequency to the limit, which > might sometimes fail. Such a failure does not provide valuable > information, because it does not indicate that there is something > necessarily wrong with either the driver or the hardware. > > Remove the test to prevent random, unnecessary failures from appearing > in CI. > > Suggested-by: Chris Wilson <chris.p.wilson@intel.com> > Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com> Thanks, Andi
Hi Krzysztof, On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote: > The test is trying to push the heartbeat frequency to the limit, which > might sometimes fail. Such a failure does not provide valuable > information, because it does not indicate that there is something > necessarily wrong with either the driver or the hardware. > > Remove the test to prevent random, unnecessary failures from appearing > in CI. > > Suggested-by: Chris Wilson <chris.p.wilson@intel.com> > Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> merged in drm-intel-gt-next. Thank you, Andi
On 03/06/2024 17:20, Niemiec, Krzysztof wrote: > The test is trying to push the heartbeat frequency to the limit, which > might sometimes fail. Such a failure does not provide valuable > information, because it does not indicate that there is something > necessarily wrong with either the driver or the hardware. > > Remove the test to prevent random, unnecessary failures from appearing > in CI. > > Suggested-by: Chris Wilson <chris.p.wilson@intel.com> > Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> Just a note in passing that comma in the email display name is I believe not RFC 5322 compliant and there might be tools which barf on it(*). If you can put it in double quotes, it would be advisable. Regards, Tvrtko *) Such as my internal pull request generator which uses CPAN's Email::Address::XS. :) > --- > .../drm/i915/gt/selftest_engine_heartbeat.c | 110 ------------------ > 1 file changed, 110 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > index ef014df4c4fc..9e4f0e417b3b 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c > @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) > return err; > } > > -static int cmp_u32(const void *_a, const void *_b) > -{ > - const u32 *a = _a, *b = _b; > - > - return *a - *b; > -} > - > -static int __live_heartbeat_fast(struct intel_engine_cs *engine) > -{ > - const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); > - struct intel_context *ce; > - struct i915_request *rq; > - ktime_t t0, t1; > - u32 times[5]; > - int err; > - int i; > - > - ce = intel_context_create(engine); > - if (IS_ERR(ce)) > - return PTR_ERR(ce); > - > - intel_engine_pm_get(engine); > - > - err = intel_engine_set_heartbeat(engine, 1); > - if (err) > - goto err_pm; > - > - for (i = 0; i < ARRAY_SIZE(times); i++) { > - do { > - /* Manufacture a tick */ > - intel_engine_park_heartbeat(engine); > - GEM_BUG_ON(engine->heartbeat.systole); > - engine->serial++; /* pretend we are not idle! */ > - intel_engine_unpark_heartbeat(engine); > - > - flush_delayed_work(&engine->heartbeat.work); > - if (!delayed_work_pending(&engine->heartbeat.work)) { > - pr_err("%s: heartbeat %d did not start\n", > - engine->name, i); > - err = -EINVAL; > - goto err_pm; > - } > - > - rcu_read_lock(); > - rq = READ_ONCE(engine->heartbeat.systole); > - if (rq) > - rq = i915_request_get_rcu(rq); > - rcu_read_unlock(); > - } while (!rq); > - > - t0 = ktime_get(); > - while (rq == READ_ONCE(engine->heartbeat.systole)) > - yield(); /* work is on the local cpu! */ > - t1 = ktime_get(); > - > - i915_request_put(rq); > - times[i] = ktime_us_delta(t1, t0); > - } > - > - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); > - > - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", > - engine->name, > - times[ARRAY_SIZE(times) / 2], > - times[0], > - times[ARRAY_SIZE(times) - 1]); > - > - /* > - * Ideally, the upper bound on min work delay would be something like > - * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we > - * are, even with system_wq_highpri, at the mercy of the CPU scheduler > - * and may be stuck behind some slow work for many millisecond. Such > - * as our very own display workers. > - */ > - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { > - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", > - engine->name, > - times[ARRAY_SIZE(times) / 2], > - error_threshold); > - err = -EINVAL; > - } > - > - reset_heartbeat(engine); > -err_pm: > - intel_engine_pm_put(engine); > - intel_context_put(ce); > - return err; > -} > - > -static int live_heartbeat_fast(void *arg) > -{ > - struct intel_gt *gt = arg; > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - int err = 0; > - > - /* Check that the heartbeat ticks at the desired rate. */ > - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) > - return 0; > - > - for_each_engine(engine, gt, id) { > - err = __live_heartbeat_fast(engine); > - if (err) > - break; > - } > - > - return err; > -} > - > static int __live_heartbeat_off(struct intel_engine_cs *engine) > { > int err; > @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) > static const struct i915_subtest tests[] = { > SUBTEST(live_idle_flush), > SUBTEST(live_idle_pulse), > - SUBTEST(live_heartbeat_fast), > SUBTEST(live_heartbeat_off), > }; > int saved_hangcheck;
Hi Tvrtko, On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote: > On 03/06/2024 17:20, Niemiec, Krzysztof wrote: > > The test is trying to push the heartbeat frequency to the limit, which > > might sometimes fail. Such a failure does not provide valuable > > information, because it does not indicate that there is something > > necessarily wrong with either the driver or the hardware. > > > > Remove the test to prevent random, unnecessary failures from appearing > > in CI. > > > > Suggested-by: Chris Wilson <chris.p.wilson@intel.com> > > Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> > > Just a note in passing that comma in the email display name is I believe not > RFC 5322 compliant and there might be tools which barf on it(*). If you can > put it in double quotes, it would be advisable. yes, we discussed it with Krzysztof, I noticed it right after I submitted the code. > Regards, > > Tvrtko > > *) Such as my internal pull request generator which uses CPAN's > Email::Address::XS. :) If we are in time, we can fix it as Krzysztof Niemiec <krzysztof.niemiec@intel.com> Sorry about this oversight, Andi
Hi Andi, On 10/06/2024 13:10, Andi Shyti wrote: > Hi Tvrtko, > > On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote: >> On 03/06/2024 17:20, Niemiec, Krzysztof wrote: >>> The test is trying to push the heartbeat frequency to the limit, which >>> might sometimes fail. Such a failure does not provide valuable >>> information, because it does not indicate that there is something >>> necessarily wrong with either the driver or the hardware. >>> >>> Remove the test to prevent random, unnecessary failures from appearing >>> in CI. >>> >>> Suggested-by: Chris Wilson <chris.p.wilson@intel.com> >>> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> >> >> Just a note in passing that comma in the email display name is I believe not >> RFC 5322 compliant and there might be tools which barf on it(*). If you can >> put it in double quotes, it would be advisable. > > yes, we discussed it with Krzysztof, I noticed it right after I > submitted the code. > >> Regards, >> >> Tvrtko >> >> *) Such as my internal pull request generator which uses CPAN's >> Email::Address::XS. :) > > If we are in time, we can fix it as Krzysztof Niemiec <krzysztof.niemiec@intel.com> > > Sorry about this oversight, It's not a big deal (it isn't the first and only occurence) and no need to do anything more than correct the display name going forward. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index ef014df4c4fc..9e4f0e417b3b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) return err; } -static int cmp_u32(const void *_a, const void *_b) -{ - const u32 *a = _a, *b = _b; - - return *a - *b; -} - -static int __live_heartbeat_fast(struct intel_engine_cs *engine) -{ - const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); - struct intel_context *ce; - struct i915_request *rq; - ktime_t t0, t1; - u32 times[5]; - int err; - int i; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - intel_engine_pm_get(engine); - - err = intel_engine_set_heartbeat(engine, 1); - if (err) - goto err_pm; - - for (i = 0; i < ARRAY_SIZE(times); i++) { - do { - /* Manufacture a tick */ - intel_engine_park_heartbeat(engine); - GEM_BUG_ON(engine->heartbeat.systole); - engine->serial++; /* pretend we are not idle! */ - intel_engine_unpark_heartbeat(engine); - - flush_delayed_work(&engine->heartbeat.work); - if (!delayed_work_pending(&engine->heartbeat.work)) { - pr_err("%s: heartbeat %d did not start\n", - engine->name, i); - err = -EINVAL; - goto err_pm; - } - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - } while (!rq); - - t0 = ktime_get(); - while (rq == READ_ONCE(engine->heartbeat.systole)) - yield(); /* work is on the local cpu! */ - t1 = ktime_get(); - - i915_request_put(rq); - times[i] = ktime_us_delta(t1, t0); - } - - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); - - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - times[0], - times[ARRAY_SIZE(times) - 1]); - - /* - * Ideally, the upper bound on min work delay would be something like - * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we - * are, even with system_wq_highpri, at the mercy of the CPU scheduler - * and may be stuck behind some slow work for many millisecond. Such - * as our very own display workers. - */ - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - error_threshold); - err = -EINVAL; - } - - reset_heartbeat(engine); -err_pm: - intel_engine_pm_put(engine); - intel_context_put(ce); - return err; -} - -static int live_heartbeat_fast(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) - return 0; - - for_each_engine(engine, gt, id) { - err = __live_heartbeat_fast(engine); - if (err) - break; - } - - return err; -} - static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), - SUBTEST(live_heartbeat_fast), SUBTEST(live_heartbeat_off), }; int saved_hangcheck;
The test is trying to push the heartbeat frequency to the limit, which might sometimes fail. Such a failure does not provide valuable information, because it does not indicate that there is something necessarily wrong with either the driver or the hardware. Remove the test to prevent random, unnecessary failures from appearing in CI. Suggested-by: Chris Wilson <chris.p.wilson@intel.com> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com> --- .../drm/i915/gt/selftest_engine_heartbeat.c | 110 ------------------ 1 file changed, 110 deletions(-)