diff mbox series

[kvm-unit-tests] x86: Remove test_multiple_nmi()

Message ID 20190424045059.11627-1-nadav.amit@gmail.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86: Remove test_multiple_nmi() | expand

Commit Message

Nadav Amit April 24, 2019, 4:50 a.m. UTC
From: Nadav Amit <nadav.amit@gmail.com>

According to the discussion in [1], expecting nested NMIs never to be
collapsed is wrong.

[1] https://marc.info/?l=kvm&m=145876994031502&w=2

Signed-off-by: Nadav Amit <nadav.amit@gmail.com>
---
 x86/apic.c | 70 ------------------------------------------------------
 1 file changed, 70 deletions(-)

Comments

Sean Christopherson April 24, 2019, 8:55 p.m. UTC | #1
On Tue, Apr 23, 2019 at 09:50:59PM -0700, nadav.amit@gmail.com wrote:
> From: Nadav Amit <nadav.amit@gmail.com>
> 
> According to the discussion in [1], expecting nested NMIs never to be

s/nested/multiple pending

> collapsed is wrong.

It'd also be helpful to quote the SDM or APM, although it's admittedly
difficult to find a relevant blurb in the SDM.  The only explict statement
regarding the number of latched/pending NMIs I could find was for SMM:

34.8 NMI HANDLING WHILE IN SMM
  NMI interrupts are blocked upon entry to the SMI handler. If an NMI request
  occurs during the SMI handler, it is latched and serviced after the processor
  exits SMM. Only one NMI request will be latched during the SMI handler.  If an
  NMI request is pending when the processor executes the RSM instruction, the NMI
  is serviced before the next instruction of the interrupted code sequence. This
  assumes that NMIs were not blocked before the SMI occurred. If NMIs were
  blocked before the SMI occurred, they are blocked after execution of RSM

All that being said, removing the test is correct as it's blatantly
subject to a race condition between vCPUs.

It probably makes sense add a single threaded test that pends an NMI from
inside the NMI handler to ensure that KVM pends NMIs correctly.  I'll send
a patch.

> 
> [1] https://marc.info/?l=kvm&m=145876994031502&w=2
> 
> Signed-off-by: Nadav Amit <nadav.amit@gmail.com>
> ---
>  x86/apic.c | 70 ------------------------------------------------------
>  1 file changed, 70 deletions(-)
> 
> diff --git a/x86/apic.c b/x86/apic.c
> index 51744cf..d1ed5ea 100644
> --- a/x86/apic.c
> +++ b/x86/apic.c
> @@ -334,75 +334,6 @@ static volatile int nmi_received;
>  static volatile int cpu0_nmi_ctr1, cpu1_nmi_ctr1;
>  static volatile int cpu0_nmi_ctr2, cpu1_nmi_ctr2;
>  
> -static void multiple_nmi_handler(isr_regs_t *regs)
> -{
> -    ++nmi_received;
> -}
> -
> -static void kick_me_nmi(void *blah)
> -{
> -    while (!nmi_done) {
> -	++cpu1_nmi_ctr1;
> -	while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1 && !nmi_done) {
> -	    pause();
> -	}
> -	if (nmi_done) {
> -	    return;
> -	}
> -	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
> -	/* make sure the NMI has arrived by sending an IPI after it */
> -	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT
> -		       | 0x44, 0);
> -	++cpu1_nmi_ctr2;
> -	while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2 && !nmi_done) {
> -	    pause();
> -	}
> -    }
> -}
> -
> -static void flush_nmi(isr_regs_t *regs)
> -{
> -    nmi_flushed = true;
> -    apic_write(APIC_EOI, 0);
> -}
> -
> -static void test_multiple_nmi(void)
> -{
> -    int i;
> -    bool ok = true;
> -
> -    if (cpu_count() < 2) {
> -	return;
> -    }
> -
> -    sti();
> -    handle_irq(2, multiple_nmi_handler);
> -    handle_irq(0x44, flush_nmi);
> -    on_cpu_async(1, kick_me_nmi, 0);
> -    for (i = 0; i < 1000000; ++i) {
> -	nmi_flushed = false;
> -	nmi_received = 0;
> -	++cpu0_nmi_ctr1;
> -	while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1) {
> -	    pause();
> -	}
> -	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
> -	while (!nmi_flushed) {
> -	    pause();
> -	}
> -	if (nmi_received != 2) {
> -	    ok = false;
> -	    break;
> -	}
> -	++cpu0_nmi_ctr2;
> -	while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2) {
> -	    pause();
> -	}
> -    }
> -    nmi_done = true;
> -    report("multiple nmi", ok);
> -}
> -
>  static volatile int lvtt_counter = 0;
>  
>  static void lvtt_handler(isr_regs_t *regs)
> @@ -614,7 +545,6 @@ int main(void)
>      test_pv_ipi();
>  
>      test_sti_nmi();
> -    test_multiple_nmi();
>  
>      test_apic_timer_one_shot();
>      test_apic_change_mode();
> -- 
> 2.19.1
>
Nadav Amit April 24, 2019, 8:59 p.m. UTC | #2
> On Apr 24, 2019, at 1:55 PM, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
> 
> On Tue, Apr 23, 2019 at 09:50:59PM -0700, nadav.amit@gmail.com wrote:
>> From: Nadav Amit <nadav.amit@gmail.com>
>> 
>> According to the discussion in [1], expecting nested NMIs never to be
> 
> s/nested/multiple pending
> 
>> collapsed is wrong.
> 
> It'd also be helpful to quote the SDM or APM, although it's admittedly
> difficult to find a relevant blurb in the SDM.  The only explict statement
> regarding the number of latched/pending NMIs I could find was for SMM:
> 
> 34.8 NMI HANDLING WHILE IN SMM
>  NMI interrupts are blocked upon entry to the SMI handler. If an NMI request
>  occurs during the SMI handler, it is latched and serviced after the processor
>  exits SMM. Only one NMI request will be latched during the SMI handler.  If an
>  NMI request is pending when the processor executes the RSM instruction, the NMI
>  is serviced before the next instruction of the interrupted code sequence. This
>  assumes that NMIs were not blocked before the SMI occurred. If NMIs were
>  blocked before the SMI occurred, they are blocked after execution of RSM
> 
> All that being said, removing the test is correct as it's blatantly
> subject to a race condition between vCPUs.
> 
> It probably makes sense add a single threaded test that pends an NMI from
> inside the NMI handler to ensure that KVM pends NMIs correctly.  I'll send
> a patch.

Thanks, Sean. I thought that quoting you should be enough. ;-)

Paolo, please let me know if you have any further feedback, so I will know
what to include in v2.
Sean Christopherson April 24, 2019, 9:49 p.m. UTC | #3
On Wed, Apr 24, 2019 at 01:59:21PM -0700, Nadav Amit wrote:
> > On Apr 24, 2019, at 1:55 PM, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
> > 
> > On Tue, Apr 23, 2019 at 09:50:59PM -0700, nadav.amit@gmail.com wrote:
> >> From: Nadav Amit <nadav.amit@gmail.com>
> >> 
> >> According to the discussion in [1], expecting nested NMIs never to be
> > 
> > s/nested/multiple pending
> > 
> >> collapsed is wrong.
> > 
> > It'd also be helpful to quote the SDM or APM, although it's admittedly
> > difficult to find a relevant blurb in the SDM.  The only explict statement
> > regarding the number of latched/pending NMIs I could find was for SMM:
> > 
> > 34.8 NMI HANDLING WHILE IN SMM
> >  NMI interrupts are blocked upon entry to the SMI handler. If an NMI request
> >  occurs during the SMI handler, it is latched and serviced after the processor
> >  exits SMM. Only one NMI request will be latched during the SMI handler.  If an
> >  NMI request is pending when the processor executes the RSM instruction, the NMI
> >  is serviced before the next instruction of the interrupted code sequence. This
> >  assumes that NMIs were not blocked before the SMI occurred. If NMIs were
> >  blocked before the SMI occurred, they are blocked after execution of RSM
> > 
> > All that being said, removing the test is correct as it's blatantly
> > subject to a race condition between vCPUs.
> > 
> > It probably makes sense add a single threaded test that pends an NMI from
> > inside the NMI handler to ensure that KVM pends NMIs correctly.  I'll send
> > a patch.
> 
> Thanks, Sean. I thought that quoting you should be enough. ;-)
> 
> Paolo, please let me know if you have any further feedback, so I will know
> what to include in v2.

Thinking about the test a bit more and rereading the KVM code, I actually
think we should keep the test.

Architecturally I don't think there are any guarantees regarding
simultaneous NMIs, but practically speaking the probability of NMIs
being collapsed (on hardware) when NMIs aren't blocked is nil.  So while
it may be architecturally legal for a VMM to drop an NMI in this case,
it's reasonable for software to expect two NMIs to be received.

And that's why KVM explicitly allows two NMIs to be pending when NMIs
aren't blocked.

Did you observe a test failure or was this found by inspection?
Nadav Amit April 24, 2019, 10:10 p.m. UTC | #4
> On Apr 24, 2019, at 2:49 PM, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
> 
> On Wed, Apr 24, 2019 at 01:59:21PM -0700, Nadav Amit wrote:
>>> On Apr 24, 2019, at 1:55 PM, Sean Christopherson <sean.j.christopherson@intel.com> wrote:
>>> 
>>> On Tue, Apr 23, 2019 at 09:50:59PM -0700, nadav.amit@gmail.com wrote:
>>>> From: Nadav Amit <nadav.amit@gmail.com>
>>>> 
>>>> According to the discussion in [1], expecting nested NMIs never to be
>>> 
>>> s/nested/multiple pending
>>> 
>>>> collapsed is wrong.
>>> 
>>> It'd also be helpful to quote the SDM or APM, although it's admittedly
>>> difficult to find a relevant blurb in the SDM.  The only explict statement
>>> regarding the number of latched/pending NMIs I could find was for SMM:
>>> 
>>> 34.8 NMI HANDLING WHILE IN SMM
>>> NMI interrupts are blocked upon entry to the SMI handler. If an NMI request
>>> occurs during the SMI handler, it is latched and serviced after the processor
>>> exits SMM. Only one NMI request will be latched during the SMI handler.  If an
>>> NMI request is pending when the processor executes the RSM instruction, the NMI
>>> is serviced before the next instruction of the interrupted code sequence. This
>>> assumes that NMIs were not blocked before the SMI occurred. If NMIs were
>>> blocked before the SMI occurred, they are blocked after execution of RSM
>>> 
>>> All that being said, removing the test is correct as it's blatantly
>>> subject to a race condition between vCPUs.
>>> 
>>> It probably makes sense add a single threaded test that pends an NMI from
>>> inside the NMI handler to ensure that KVM pends NMIs correctly.  I'll send
>>> a patch.
>> 
>> Thanks, Sean. I thought that quoting you should be enough. ;-)
>> 
>> Paolo, please let me know if you have any further feedback, so I will know
>> what to include in v2.
> 
> Thinking about the test a bit more and rereading the KVM code, I actually
> think we should keep the test.
> 
> Architecturally I don't think there are any guarantees regarding
> simultaneous NMIs, but practically speaking the probability of NMIs
> being collapsed (on hardware) when NMIs aren't blocked is nil.  So while
> it may be architecturally legal for a VMM to drop an NMI in this case,
> it's reasonable for software to expect two NMIs to be received.
> 
> And that's why KVM explicitly allows two NMIs to be pending when NMIs
> aren't blocked.
> 
> Did you observe a test failure or was this found by inspection?

I observed a failure (not on KVM), but I now see that bare-metal indeed
allows this test to pass. So I can understand if you do not want to remove
the test.
diff mbox series

Patch

diff --git a/x86/apic.c b/x86/apic.c
index 51744cf..d1ed5ea 100644
--- a/x86/apic.c
+++ b/x86/apic.c
@@ -334,75 +334,6 @@  static volatile int nmi_received;
 static volatile int cpu0_nmi_ctr1, cpu1_nmi_ctr1;
 static volatile int cpu0_nmi_ctr2, cpu1_nmi_ctr2;
 
-static void multiple_nmi_handler(isr_regs_t *regs)
-{
-    ++nmi_received;
-}
-
-static void kick_me_nmi(void *blah)
-{
-    while (!nmi_done) {
-	++cpu1_nmi_ctr1;
-	while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1 && !nmi_done) {
-	    pause();
-	}
-	if (nmi_done) {
-	    return;
-	}
-	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
-	/* make sure the NMI has arrived by sending an IPI after it */
-	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_FIXED | APIC_INT_ASSERT
-		       | 0x44, 0);
-	++cpu1_nmi_ctr2;
-	while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2 && !nmi_done) {
-	    pause();
-	}
-    }
-}
-
-static void flush_nmi(isr_regs_t *regs)
-{
-    nmi_flushed = true;
-    apic_write(APIC_EOI, 0);
-}
-
-static void test_multiple_nmi(void)
-{
-    int i;
-    bool ok = true;
-
-    if (cpu_count() < 2) {
-	return;
-    }
-
-    sti();
-    handle_irq(2, multiple_nmi_handler);
-    handle_irq(0x44, flush_nmi);
-    on_cpu_async(1, kick_me_nmi, 0);
-    for (i = 0; i < 1000000; ++i) {
-	nmi_flushed = false;
-	nmi_received = 0;
-	++cpu0_nmi_ctr1;
-	while (cpu1_nmi_ctr1 != cpu0_nmi_ctr1) {
-	    pause();
-	}
-	apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_NMI | APIC_INT_ASSERT, 0);
-	while (!nmi_flushed) {
-	    pause();
-	}
-	if (nmi_received != 2) {
-	    ok = false;
-	    break;
-	}
-	++cpu0_nmi_ctr2;
-	while (cpu1_nmi_ctr2 != cpu0_nmi_ctr2) {
-	    pause();
-	}
-    }
-    nmi_done = true;
-    report("multiple nmi", ok);
-}
-
 static volatile int lvtt_counter = 0;
 
 static void lvtt_handler(isr_regs_t *regs)
@@ -614,7 +545,6 @@  int main(void)
     test_pv_ipi();
 
     test_sti_nmi();
-    test_multiple_nmi();
 
     test_apic_timer_one_shot();
     test_apic_change_mode();