diff mbox series

drm/i915/gt: Delete the live_hearbeat_fast selftest

Message ID fe2vu5h7v7ooxbhwpbfsypxg5mjrnt56gc3cgrqpnhgrgce334@qfrv2skxrp47 (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Delete the live_hearbeat_fast selftest | expand

Commit Message

Krzysztof Niemiec June 3, 2024, 4:20 p.m. UTC
The test is trying to push the heartbeat frequency to the limit, which
might sometimes fail. Such a failure does not provide valuable
information, because it does not indicate that there is something
necessarily wrong with either the driver or the hardware.

Remove the test to prevent random, unnecessary failures from appearing
in CI.

Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>
---
 .../drm/i915/gt/selftest_engine_heartbeat.c   | 110 ------------------
 1 file changed, 110 deletions(-)

Comments

Andi Shyti June 4, 2024, 12:40 a.m. UTC | #1
Hi Krzysztof,

On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote:
> The test is trying to push the heartbeat frequency to the limit, which
> might sometimes fail. Such a failure does not provide valuable
> information, because it does not indicate that there is something
> necessarily wrong with either the driver or the hardware.
> 
> Remove the test to prevent random, unnecessary failures from appearing
> in CI.
> 
> Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Thanks,
Andi
Andi Shyti June 6, 2024, 2:29 a.m. UTC | #2
Hi Krzysztof,

On Mon, Jun 03, 2024 at 06:20:22PM +0200, Niemiec, Krzysztof wrote:
> The test is trying to push the heartbeat frequency to the limit, which
> might sometimes fail. Such a failure does not provide valuable
> information, because it does not indicate that there is something
> necessarily wrong with either the driver or the hardware.
> 
> Remove the test to prevent random, unnecessary failures from appearing
> in CI.
> 
> Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>

merged in drm-intel-gt-next.

Thank you,
Andi
Tvrtko Ursulin June 10, 2024, 11:42 a.m. UTC | #3
On 03/06/2024 17:20, Niemiec, Krzysztof wrote:
> The test is trying to push the heartbeat frequency to the limit, which
> might sometimes fail. Such a failure does not provide valuable
> information, because it does not indicate that there is something
> necessarily wrong with either the driver or the hardware.
> 
> Remove the test to prevent random, unnecessary failures from appearing
> in CI.
> 
> Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>

Just a note in passing that comma in the email display name is I believe 
not RFC 5322 compliant and there might be tools which barf on it(*). If 
you can put it in double quotes, it would be advisable.

Regards,

Tvrtko

*) Such as my internal pull request generator which uses CPAN's 
Email::Address::XS. :)

> ---
>   .../drm/i915/gt/selftest_engine_heartbeat.c   | 110 ------------------
>   1 file changed, 110 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> index ef014df4c4fc..9e4f0e417b3b 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
> @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg)
>   	return err;
>   }
>   
> -static int cmp_u32(const void *_a, const void *_b)
> -{
> -	const u32 *a = _a, *b = _b;
> -
> -	return *a - *b;
> -}
> -
> -static int __live_heartbeat_fast(struct intel_engine_cs *engine)
> -{
> -	const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
> -	struct intel_context *ce;
> -	struct i915_request *rq;
> -	ktime_t t0, t1;
> -	u32 times[5];
> -	int err;
> -	int i;
> -
> -	ce = intel_context_create(engine);
> -	if (IS_ERR(ce))
> -		return PTR_ERR(ce);
> -
> -	intel_engine_pm_get(engine);
> -
> -	err = intel_engine_set_heartbeat(engine, 1);
> -	if (err)
> -		goto err_pm;
> -
> -	for (i = 0; i < ARRAY_SIZE(times); i++) {
> -		do {
> -			/* Manufacture a tick */
> -			intel_engine_park_heartbeat(engine);
> -			GEM_BUG_ON(engine->heartbeat.systole);
> -			engine->serial++; /*  pretend we are not idle! */
> -			intel_engine_unpark_heartbeat(engine);
> -
> -			flush_delayed_work(&engine->heartbeat.work);
> -			if (!delayed_work_pending(&engine->heartbeat.work)) {
> -				pr_err("%s: heartbeat %d did not start\n",
> -				       engine->name, i);
> -				err = -EINVAL;
> -				goto err_pm;
> -			}
> -
> -			rcu_read_lock();
> -			rq = READ_ONCE(engine->heartbeat.systole);
> -			if (rq)
> -				rq = i915_request_get_rcu(rq);
> -			rcu_read_unlock();
> -		} while (!rq);
> -
> -		t0 = ktime_get();
> -		while (rq == READ_ONCE(engine->heartbeat.systole))
> -			yield(); /* work is on the local cpu! */
> -		t1 = ktime_get();
> -
> -		i915_request_put(rq);
> -		times[i] = ktime_us_delta(t1, t0);
> -	}
> -
> -	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
> -
> -	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
> -		engine->name,
> -		times[ARRAY_SIZE(times) / 2],
> -		times[0],
> -		times[ARRAY_SIZE(times) - 1]);
> -
> -	/*
> -	 * Ideally, the upper bound on min work delay would be something like
> -	 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
> -	 * are, even with system_wq_highpri, at the mercy of the CPU scheduler
> -	 * and may be stuck behind some slow work for many millisecond. Such
> -	 * as our very own display workers.
> -	 */
> -	if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
> -		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
> -		       engine->name,
> -		       times[ARRAY_SIZE(times) / 2],
> -		       error_threshold);
> -		err = -EINVAL;
> -	}
> -
> -	reset_heartbeat(engine);
> -err_pm:
> -	intel_engine_pm_put(engine);
> -	intel_context_put(ce);
> -	return err;
> -}
> -
> -static int live_heartbeat_fast(void *arg)
> -{
> -	struct intel_gt *gt = arg;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	int err = 0;
> -
> -	/* Check that the heartbeat ticks at the desired rate. */
> -	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
> -		return 0;
> -
> -	for_each_engine(engine, gt, id) {
> -		err = __live_heartbeat_fast(engine);
> -		if (err)
> -			break;
> -	}
> -
> -	return err;
> -}
> -
>   static int __live_heartbeat_off(struct intel_engine_cs *engine)
>   {
>   	int err;
> @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(live_idle_flush),
>   		SUBTEST(live_idle_pulse),
> -		SUBTEST(live_heartbeat_fast),
>   		SUBTEST(live_heartbeat_off),
>   	};
>   	int saved_hangcheck;
Andi Shyti June 10, 2024, 12:10 p.m. UTC | #4
Hi Tvrtko,

On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote:
> On 03/06/2024 17:20, Niemiec, Krzysztof wrote:
> > The test is trying to push the heartbeat frequency to the limit, which
> > might sometimes fail. Such a failure does not provide valuable
> > information, because it does not indicate that there is something
> > necessarily wrong with either the driver or the hardware.
> > 
> > Remove the test to prevent random, unnecessary failures from appearing
> > in CI.
> > 
> > Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
> > Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>
> 
> Just a note in passing that comma in the email display name is I believe not
> RFC 5322 compliant and there might be tools which barf on it(*). If you can
> put it in double quotes, it would be advisable.

yes, we discussed it with Krzysztof, I noticed it right after I
submitted the code.

> Regards,
> 
> Tvrtko
> 
> *) Such as my internal pull request generator which uses CPAN's
> Email::Address::XS. :)

If we are in time, we can fix it as Krzysztof Niemiec <krzysztof.niemiec@intel.com>

Sorry about this oversight,
Andi
Tvrtko Ursulin June 10, 2024, 1 p.m. UTC | #5
Hi Andi,

On 10/06/2024 13:10, Andi Shyti wrote:
> Hi Tvrtko,
> 
> On Mon, Jun 10, 2024 at 12:42:31PM +0100, Tvrtko Ursulin wrote:
>> On 03/06/2024 17:20, Niemiec, Krzysztof wrote:
>>> The test is trying to push the heartbeat frequency to the limit, which
>>> might sometimes fail. Such a failure does not provide valuable
>>> information, because it does not indicate that there is something
>>> necessarily wrong with either the driver or the hardware.
>>>
>>> Remove the test to prevent random, unnecessary failures from appearing
>>> in CI.
>>>
>>> Suggested-by: Chris Wilson <chris.p.wilson@intel.com>
>>> Signed-off-by: Niemiec, Krzysztof <krzysztof.niemiec@intel.com>
>>
>> Just a note in passing that comma in the email display name is I believe not
>> RFC 5322 compliant and there might be tools which barf on it(*). If you can
>> put it in double quotes, it would be advisable.
> 
> yes, we discussed it with Krzysztof, I noticed it right after I
> submitted the code.
> 
>> Regards,
>>
>> Tvrtko
>>
>> *) Such as my internal pull request generator which uses CPAN's
>> Email::Address::XS. :)
> 
> If we are in time, we can fix it as Krzysztof Niemiec <krzysztof.niemiec@intel.com>
> 
> Sorry about this oversight,

It's not a big deal (it isn't the first and only occurence) and no need 
to do anything more than correct the display name going forward.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index ef014df4c4fc..9e4f0e417b3b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -193,115 +193,6 @@  static int live_idle_pulse(void *arg)
 	return err;
 }
 
-static int cmp_u32(const void *_a, const void *_b)
-{
-	const u32 *a = _a, *b = _b;
-
-	return *a - *b;
-}
-
-static int __live_heartbeat_fast(struct intel_engine_cs *engine)
-{
-	const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
-	struct intel_context *ce;
-	struct i915_request *rq;
-	ktime_t t0, t1;
-	u32 times[5];
-	int err;
-	int i;
-
-	ce = intel_context_create(engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
-	intel_engine_pm_get(engine);
-
-	err = intel_engine_set_heartbeat(engine, 1);
-	if (err)
-		goto err_pm;
-
-	for (i = 0; i < ARRAY_SIZE(times); i++) {
-		do {
-			/* Manufacture a tick */
-			intel_engine_park_heartbeat(engine);
-			GEM_BUG_ON(engine->heartbeat.systole);
-			engine->serial++; /*  pretend we are not idle! */
-			intel_engine_unpark_heartbeat(engine);
-
-			flush_delayed_work(&engine->heartbeat.work);
-			if (!delayed_work_pending(&engine->heartbeat.work)) {
-				pr_err("%s: heartbeat %d did not start\n",
-				       engine->name, i);
-				err = -EINVAL;
-				goto err_pm;
-			}
-
-			rcu_read_lock();
-			rq = READ_ONCE(engine->heartbeat.systole);
-			if (rq)
-				rq = i915_request_get_rcu(rq);
-			rcu_read_unlock();
-		} while (!rq);
-
-		t0 = ktime_get();
-		while (rq == READ_ONCE(engine->heartbeat.systole))
-			yield(); /* work is on the local cpu! */
-		t1 = ktime_get();
-
-		i915_request_put(rq);
-		times[i] = ktime_us_delta(t1, t0);
-	}
-
-	sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
-
-	pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
-		engine->name,
-		times[ARRAY_SIZE(times) / 2],
-		times[0],
-		times[ARRAY_SIZE(times) - 1]);
-
-	/*
-	 * Ideally, the upper bound on min work delay would be something like
-	 * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
-	 * are, even with system_wq_highpri, at the mercy of the CPU scheduler
-	 * and may be stuck behind some slow work for many millisecond. Such
-	 * as our very own display workers.
-	 */
-	if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
-		pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
-		       engine->name,
-		       times[ARRAY_SIZE(times) / 2],
-		       error_threshold);
-		err = -EINVAL;
-	}
-
-	reset_heartbeat(engine);
-err_pm:
-	intel_engine_pm_put(engine);
-	intel_context_put(ce);
-	return err;
-}
-
-static int live_heartbeat_fast(void *arg)
-{
-	struct intel_gt *gt = arg;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err = 0;
-
-	/* Check that the heartbeat ticks at the desired rate. */
-	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
-		return 0;
-
-	for_each_engine(engine, gt, id) {
-		err = __live_heartbeat_fast(engine);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
 static int __live_heartbeat_off(struct intel_engine_cs *engine)
 {
 	int err;
@@ -372,7 +263,6 @@  int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_idle_flush),
 		SUBTEST(live_idle_pulse),
-		SUBTEST(live_heartbeat_fast),
 		SUBTEST(live_heartbeat_off),
 	};
 	int saved_hangcheck;