diff mbox series

[kvm-unit-tests] x86/apic: wait for wrap around in lapic timer periodic test

Message ID 1551773019-19121-1-git-send-email-wrfsh@yandex-team.ru (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86/apic: wait for wrap around in lapic timer periodic test | expand

Commit Message

Evgeny Yakovlev March 5, 2019, 8:03 a.m. UTC
We are seeing very rare test failures in apic/split-apic with the
following: "FAIL: TMCCT should be reset to the initial-count". We are
running on 4.14 kernels and this problem occurs sporadically although we
didn't try to reproduce it on any other stable kernel branch.

It looks like under KVM lapic timer in periodic mode may return several
consecutive 0 values in apic_get_tmccr when hrtimer async callback is still
restarting the hrtimer and recalculating expiration_time value but
apic_get_tmccr is seeing ktime_now() already ahead of (still old)
expiration_time value. After a couple of 0-es from TMCCR everything
goes back to normal.

This change forces test_apic_change_mode test to wait specifically for
lapic timer wrap around skipping zero TMCCR values in one case when we are
testing for timer in periodic mode to correctly reload TMCCR from TMICR.
If timer mode change is indeed broken and TMCCR is not reset then we
will see apic test timeout.

Signed-off-by: Evgeny Yakovlev <wrfsh@yandex-team.ru>
---
 x86/apic.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini March 8, 2019, 1:04 p.m. UTC | #1
On 05/03/19 09:03, Evgeny Yakovlev wrote:
> We are seeing very rare test failures in apic/split-apic with the
> following: "FAIL: TMCCT should be reset to the initial-count". We are
> running on 4.14 kernels and this problem occurs sporadically although we
> didn't try to reproduce it on any other stable kernel branch.
> 
> It looks like under KVM lapic timer in periodic mode may return several
> consecutive 0 values in apic_get_tmccr when hrtimer async callback is still
> restarting the hrtimer and recalculating expiration_time value but
> apic_get_tmccr is seeing ktime_now() already ahead of (still old)
> expiration_time value. After a couple of 0-es from TMCCR everything
> goes back to normal.
> 
> This change forces test_apic_change_mode test to wait specifically for
> lapic timer wrap around skipping zero TMCCR values in one case when we are
> testing for timer in periodic mode to correctly reload TMCCR from TMICR.
> If timer mode change is indeed broken and TMCCR is not reset then we
> will see apic test timeout.
> 
> Signed-off-by: Evgeny Yakovlev <wrfsh@yandex-team.ru>
> ---
>  x86/apic.c | 23 +++++++++++++++++++++--
>  1 file changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/x86/apic.c b/x86/apic.c
> index cfdbef2..51744cf 100644
> --- a/x86/apic.c
> +++ b/x86/apic.c
> @@ -489,7 +489,7 @@ static void test_physical_broadcast(void)
>  	report("APIC physical broadcast shorthand", broadcast_received(ncpus));
>  }
>  
> -static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half)
> +static void wait_until_tmcct_common(uint32_t initial_count, bool stop_when_half, bool should_wrap_around)
>  {
>  	uint32_t tmcct = apic_read(APIC_TMCCT);
>  
> @@ -503,9 +503,23 @@ static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half
>  		/* Wait until the counter reach 0 or wrap-around */
>  		while ( tmcct <= (initial_count / 2) && tmcct > 0 )
>  			tmcct = apic_read(APIC_TMCCT);
> +
> +		/* Wait specifically for wrap around to skip 0 TMCCR if we were asked to */
> +		while (should_wrap_around && !tmcct)
> +			tmcct = apic_read(APIC_TMCCT);
>  	}
>  }
>  
> +static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half)
> +{
> +	return wait_until_tmcct_common(initial_count, stop_when_half, false);
> +}
> +
> +static void wait_until_tmcct_wrap_around(uint32_t initial_count, bool stop_when_half)
> +{
> +	return wait_until_tmcct_common(initial_count, stop_when_half, true);
> +}
> +
>  static inline void apic_change_mode(unsigned long new_mode)
>  {
>  	uint32_t lvtt;
> @@ -548,7 +562,12 @@ static void test_apic_change_mode(void)
>  	 * counting down from where it was
>  	 */
>  	report("TMCCT should not be reset to TMICT value", apic_read(APIC_TMCCT) < (tmict / 2));
> -	wait_until_tmcct_is_zero(tmict, false);
> +	/*
> +	 * Specifically wait for timer wrap around and skip 0.
> +	 * Under KVM lapic there is a possibility that a small amount of consecutive
> +	 * TMCCR reads return 0 while hrtimer is reset in an async callback
> +	 */
> +	wait_until_tmcct_wrap_around(tmict, false);
>  	report("TMCCT should be reset to the initial-count", apic_read(APIC_TMCCT) > (tmict / 2));
>  
>  	wait_until_tmcct_is_zero(tmict, true);
> 

Applied, thanks.

Paolo
diff mbox series

Patch

diff --git a/x86/apic.c b/x86/apic.c
index cfdbef2..51744cf 100644
--- a/x86/apic.c
+++ b/x86/apic.c
@@ -489,7 +489,7 @@  static void test_physical_broadcast(void)
 	report("APIC physical broadcast shorthand", broadcast_received(ncpus));
 }
 
-static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half)
+static void wait_until_tmcct_common(uint32_t initial_count, bool stop_when_half, bool should_wrap_around)
 {
 	uint32_t tmcct = apic_read(APIC_TMCCT);
 
@@ -503,9 +503,23 @@  static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half
 		/* Wait until the counter reach 0 or wrap-around */
 		while ( tmcct <= (initial_count / 2) && tmcct > 0 )
 			tmcct = apic_read(APIC_TMCCT);
+
+		/* Wait specifically for wrap around to skip 0 TMCCR if we were asked to */
+		while (should_wrap_around && !tmcct)
+			tmcct = apic_read(APIC_TMCCT);
 	}
 }
 
+static void wait_until_tmcct_is_zero(uint32_t initial_count, bool stop_when_half)
+{
+	return wait_until_tmcct_common(initial_count, stop_when_half, false);
+}
+
+static void wait_until_tmcct_wrap_around(uint32_t initial_count, bool stop_when_half)
+{
+	return wait_until_tmcct_common(initial_count, stop_when_half, true);
+}
+
 static inline void apic_change_mode(unsigned long new_mode)
 {
 	uint32_t lvtt;
@@ -548,7 +562,12 @@  static void test_apic_change_mode(void)
 	 * counting down from where it was
 	 */
 	report("TMCCT should not be reset to TMICT value", apic_read(APIC_TMCCT) < (tmict / 2));
-	wait_until_tmcct_is_zero(tmict, false);
+	/*
+	 * Specifically wait for timer wrap around and skip 0.
+	 * Under KVM lapic there is a possibility that a small amount of consecutive
+	 * TMCCR reads return 0 while hrtimer is reset in an async callback
+	 */
+	wait_until_tmcct_wrap_around(tmict, false);
 	report("TMCCT should be reset to the initial-count", apic_read(APIC_TMCCT) > (tmict / 2));
 
 	wait_until_tmcct_is_zero(tmict, true);