diff mbox series

[v3,6/6] KVM: selftests: test KVM_GUESTDBG_BLOCKIRQ

Message ID 20210811122927.900604-7-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: my debug patch queue | expand

Commit Message

Maxim Levitsky Aug. 11, 2021, 12:29 p.m. UTC
Modify debug_regs test to create a pending interrupt
and see that it is blocked when single stepping is done
with KVM_GUESTDBG_BLOCKIRQ

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini Sept. 6, 2021, 11:20 a.m. UTC | #1
On 11/08/21 14:29, Maxim Levitsky wrote:
> Modify debug_regs test to create a pending interrupt
> and see that it is blocked when single stepping is done
> with KVM_GUESTDBG_BLOCKIRQ
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
>   1 file changed, 21 insertions(+), 3 deletions(-)

I haven't looked very much at this, but the test fails.

Paolo

> diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> index 6097a8283377..5f078db1bcba 100644
> --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
> +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> @@ -8,12 +8,15 @@
>   #include <string.h>
>   #include "kvm_util.h"
>   #include "processor.h"
> +#include "apic.h"
>   
>   #define VCPU_ID 0
>   
>   #define DR6_BD		(1 << 13)
>   #define DR7_GD		(1 << 13)
>   
> +#define IRQ_VECTOR 0xAA
> +
>   /* For testing data access debug BP */
>   uint32_t guest_value;
>   
> @@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
>   
>   static void guest_code(void)
>   {
> +	/* Create a pending interrupt on current vCPU */
> +	x2apic_enable();
> +	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
> +			 APIC_DM_FIXED | IRQ_VECTOR);
> +
>   	/*
>   	 * Software BP tests.
>   	 *
> @@ -38,12 +46,19 @@ static void guest_code(void)
>   		     "mov %%rax,%0;\n\t write_data:"
>   		     : "=m" (guest_value) : : "rax");
>   
> -	/* Single step test, covers 2 basic instructions and 2 emulated */
> +	/*
> +	 * Single step test, covers 2 basic instructions and 2 emulated
> +	 *
> +	 * Enable interrupts during the single stepping to see that
> +	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
> +	 */
>   	asm volatile("ss_start: "
> +		     "sti\n\t"
>   		     "xor %%eax,%%eax\n\t"
>   		     "cpuid\n\t"
>   		     "movl $0x1a0,%%ecx\n\t"
>   		     "rdmsr\n\t"
> +		     "cli\n\t"
>   		     : : : "eax", "ebx", "ecx", "edx");
>   
>   	/* DR6.BD test */
> @@ -72,11 +87,13 @@ int main(void)
>   	uint64_t cmd;
>   	int i;
>   	/* Instruction lengths starting at ss_start */
> -	int ss_size[4] = {
> +	int ss_size[6] = {
> +		1,		/* sti*/
>   		2,		/* xor */
>   		2,		/* cpuid */
>   		5,		/* mov */
>   		2,		/* rdmsr */
> +		1,		/* cli */
>   	};
>   
>   	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
> @@ -154,7 +171,8 @@ int main(void)
>   	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
>   		target_rip += ss_size[i];
>   		CLEAR_DEBUG();
> -		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
> +		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
> +				KVM_GUESTDBG_BLOCKIRQ;
>   		debug.arch.debugreg[7] = 0x00000400;
>   		APPLY_DEBUG();
>   		vcpu_run(vm, VCPU_ID);
>
Maxim Levitsky Sept. 6, 2021, 9:03 p.m. UTC | #2
On Mon, 2021-09-06 at 13:20 +0200, Paolo Bonzini wrote:
> On 11/08/21 14:29, Maxim Levitsky wrote:
> > Modify debug_regs test to create a pending interrupt
> > and see that it is blocked when single stepping is done
> > with KVM_GUESTDBG_BLOCKIRQ
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
> >   1 file changed, 21 insertions(+), 3 deletions(-)
> 
> I haven't looked very much at this, but the test fails.

Works for me :-(

[mlevitsk@starship ~/Kernel/master/src/tools/testing/selftests/kvm]$./x86_64/debug_regs 
[mlevitsk@starship ~/Kernel/master/src/tools/testing/selftests/kvm]$echo $?
0


Maybe you run the test on kernel that doesn't support KVM_GUESTDBG_BLOCKIRQ?

Best regards,
	Maxim Levitsky

> 
> Paolo
> 
> > diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > index 6097a8283377..5f078db1bcba 100644
> > --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > @@ -8,12 +8,15 @@
> >   #include <string.h>
> >   #include "kvm_util.h"
> >   #include "processor.h"
> > +#include "apic.h"
> >   
> >   #define VCPU_ID 0
> >   
> >   #define DR6_BD		(1 << 13)
> >   #define DR7_GD		(1 << 13)
> >   
> > +#define IRQ_VECTOR 0xAA
> > +
> >   /* For testing data access debug BP */
> >   uint32_t guest_value;
> >   
> > @@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
> >   
> >   static void guest_code(void)
> >   {
> > +	/* Create a pending interrupt on current vCPU */
> > +	x2apic_enable();
> > +	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
> > +			 APIC_DM_FIXED | IRQ_VECTOR);
> > +
> >   	/*
> >   	 * Software BP tests.
> >   	 *
> > @@ -38,12 +46,19 @@ static void guest_code(void)
> >   		     "mov %%rax,%0;\n\t write_data:"
> >   		     : "=m" (guest_value) : : "rax");
> >   
> > -	/* Single step test, covers 2 basic instructions and 2 emulated */
> > +	/*
> > +	 * Single step test, covers 2 basic instructions and 2 emulated
> > +	 *
> > +	 * Enable interrupts during the single stepping to see that
> > +	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
> > +	 */
> >   	asm volatile("ss_start: "
> > +		     "sti\n\t"
> >   		     "xor %%eax,%%eax\n\t"
> >   		     "cpuid\n\t"
> >   		     "movl $0x1a0,%%ecx\n\t"
> >   		     "rdmsr\n\t"
> > +		     "cli\n\t"
> >   		     : : : "eax", "ebx", "ecx", "edx");
> >   
> >   	/* DR6.BD test */
> > @@ -72,11 +87,13 @@ int main(void)
> >   	uint64_t cmd;
> >   	int i;
> >   	/* Instruction lengths starting at ss_start */
> > -	int ss_size[4] = {
> > +	int ss_size[6] = {
> > +		1,		/* sti*/
> >   		2,		/* xor */
> >   		2,		/* cpuid */
> >   		5,		/* mov */
> >   		2,		/* rdmsr */
> > +		1,		/* cli */
> >   	};
> >   
> >   	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
> > @@ -154,7 +171,8 @@ int main(void)
> >   	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
> >   		target_rip += ss_size[i];
> >   		CLEAR_DEBUG();
> > -		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
> > +		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
> > +				KVM_GUESTDBG_BLOCKIRQ;
> >   		debug.arch.debugreg[7] = 0x00000400;
> >   		APPLY_DEBUG();
> >   		vcpu_run(vm, VCPU_ID);
> >
Vitaly Kuznetsov Nov. 1, 2021, 3:43 p.m. UTC | #3
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 11/08/21 14:29, Maxim Levitsky wrote:
>> Modify debug_regs test to create a pending interrupt
>> and see that it is blocked when single stepping is done
>> with KVM_GUESTDBG_BLOCKIRQ
>> 
>> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
>> ---
>>   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
>>   1 file changed, 21 insertions(+), 3 deletions(-)
>
> I haven't looked very much at this, but the test fails.
>

Same here,

the test passes on AMD but fails consistently on Intel:

# ./x86_64/debug_regs 
==== Test Assertion Failure ====
  x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
  pid=13434 tid=13434 errno=0 - Success
     1	0x00000000004027c6: main at debug_regs.c:179
     2	0x00007f65344cf554: ?? ??:0
     3	0x000000000040294a: _start at ??:?
  SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)

(I know I'm late to the party).

> Paolo
>
>> diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
>> index 6097a8283377..5f078db1bcba 100644
>> --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
>> +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
>> @@ -8,12 +8,15 @@
>>   #include <string.h>
>>   #include "kvm_util.h"
>>   #include "processor.h"
>> +#include "apic.h"
>>   
>>   #define VCPU_ID 0
>>   
>>   #define DR6_BD		(1 << 13)
>>   #define DR7_GD		(1 << 13)
>>   
>> +#define IRQ_VECTOR 0xAA
>> +
>>   /* For testing data access debug BP */
>>   uint32_t guest_value;
>>   
>> @@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
>>   
>>   static void guest_code(void)
>>   {
>> +	/* Create a pending interrupt on current vCPU */
>> +	x2apic_enable();
>> +	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
>> +			 APIC_DM_FIXED | IRQ_VECTOR);
>> +
>>   	/*
>>   	 * Software BP tests.
>>   	 *
>> @@ -38,12 +46,19 @@ static void guest_code(void)
>>   		     "mov %%rax,%0;\n\t write_data:"
>>   		     : "=m" (guest_value) : : "rax");
>>   
>> -	/* Single step test, covers 2 basic instructions and 2 emulated */
>> +	/*
>> +	 * Single step test, covers 2 basic instructions and 2 emulated
>> +	 *
>> +	 * Enable interrupts during the single stepping to see that
>> +	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
>> +	 */
>>   	asm volatile("ss_start: "
>> +		     "sti\n\t"
>>   		     "xor %%eax,%%eax\n\t"
>>   		     "cpuid\n\t"
>>   		     "movl $0x1a0,%%ecx\n\t"
>>   		     "rdmsr\n\t"
>> +		     "cli\n\t"
>>   		     : : : "eax", "ebx", "ecx", "edx");
>>   
>>   	/* DR6.BD test */
>> @@ -72,11 +87,13 @@ int main(void)
>>   	uint64_t cmd;
>>   	int i;
>>   	/* Instruction lengths starting at ss_start */
>> -	int ss_size[4] = {
>> +	int ss_size[6] = {
>> +		1,		/* sti*/
>>   		2,		/* xor */
>>   		2,		/* cpuid */
>>   		5,		/* mov */
>>   		2,		/* rdmsr */
>> +		1,		/* cli */
>>   	};
>>   
>>   	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
>> @@ -154,7 +171,8 @@ int main(void)
>>   	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
>>   		target_rip += ss_size[i];
>>   		CLEAR_DEBUG();
>> -		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
>> +		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
>> +				KVM_GUESTDBG_BLOCKIRQ;
>>   		debug.arch.debugreg[7] = 0x00000400;
>>   		APPLY_DEBUG();
>>   		vcpu_run(vm, VCPU_ID);
>> 
>
Maxim Levitsky Nov. 1, 2021, 4:19 p.m. UTC | #4
On Mon, 2021-11-01 at 16:43 +0100, Vitaly Kuznetsov wrote:
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
> > On 11/08/21 14:29, Maxim Levitsky wrote:
> > > Modify debug_regs test to create a pending interrupt
> > > and see that it is blocked when single stepping is done
> > > with KVM_GUESTDBG_BLOCKIRQ
> > > 
> > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
> > >   1 file changed, 21 insertions(+), 3 deletions(-)
> > 
> > I haven't looked very much at this, but the test fails.
> > 
> 
> Same here,
> 
> the test passes on AMD but fails consistently on Intel:
> 
> # ./x86_64/debug_regs 
> ==== Test Assertion Failure ====
>   x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
>   pid=13434 tid=13434 errno=0 - Success
>      1	0x00000000004027c6: main at debug_regs.c:179
>      2	0x00007f65344cf554: ?? ??:0
>      3	0x000000000040294a: _start at ??:?
>   SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
> 
> (I know I'm late to the party).

Well that is strange. It passes on my intel laptop. Just tested 
(kvm/queue + qemu master, compiled today) :-(

It fails on iteration 1 (and there is iteration 0) which I think means that we
start with RIP on sti, and get #DB on start of xor instruction first (correctly), 
and then we get #DB again on start of xor instruction again?

Something very strange. My laptop has i7-7600U.

Best regards,
	Maxim Levitsky




> 
> > Paolo
> > 
> > > diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > > index 6097a8283377..5f078db1bcba 100644
> > > --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > > +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
> > > @@ -8,12 +8,15 @@
> > >   #include <string.h>
> > >   #include "kvm_util.h"
> > >   #include "processor.h"
> > > +#include "apic.h"
> > >   
> > >   #define VCPU_ID 0
> > >   
> > >   #define DR6_BD		(1 << 13)
> > >   #define DR7_GD		(1 << 13)
> > >   
> > > +#define IRQ_VECTOR 0xAA
> > > +
> > >   /* For testing data access debug BP */
> > >   uint32_t guest_value;
> > >   
> > > @@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
> > >   
> > >   static void guest_code(void)
> > >   {
> > > +	/* Create a pending interrupt on current vCPU */
> > > +	x2apic_enable();
> > > +	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
> > > +			 APIC_DM_FIXED | IRQ_VECTOR);
> > > +
> > >   	/*
> > >   	 * Software BP tests.
> > >   	 *
> > > @@ -38,12 +46,19 @@ static void guest_code(void)
> > >   		     "mov %%rax,%0;\n\t write_data:"
> > >   		     : "=m" (guest_value) : : "rax");
> > >   
> > > -	/* Single step test, covers 2 basic instructions and 2 emulated */
> > > +	/*
> > > +	 * Single step test, covers 2 basic instructions and 2 emulated
> > > +	 *
> > > +	 * Enable interrupts during the single stepping to see that
> > > +	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
> > > +	 */
> > >   	asm volatile("ss_start: "
> > > +		     "sti\n\t"
> > >   		     "xor %%eax,%%eax\n\t"
> > >   		     "cpuid\n\t"
> > >   		     "movl $0x1a0,%%ecx\n\t"
> > >   		     "rdmsr\n\t"
> > > +		     "cli\n\t"
> > >   		     : : : "eax", "ebx", "ecx", "edx");
> > >   
> > >   	/* DR6.BD test */
> > > @@ -72,11 +87,13 @@ int main(void)
> > >   	uint64_t cmd;
> > >   	int i;
> > >   	/* Instruction lengths starting at ss_start */
> > > -	int ss_size[4] = {
> > > +	int ss_size[6] = {
> > > +		1,		/* sti*/
> > >   		2,		/* xor */
> > >   		2,		/* cpuid */
> > >   		5,		/* mov */
> > >   		2,		/* rdmsr */
> > > +		1,		/* cli */
> > >   	};
> > >   
> > >   	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
> > > @@ -154,7 +171,8 @@ int main(void)
> > >   	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
> > >   		target_rip += ss_size[i];
> > >   		CLEAR_DEBUG();
> > > -		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
> > > +		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
> > > +				KVM_GUESTDBG_BLOCKIRQ;
> > >   		debug.arch.debugreg[7] = 0x00000400;
> > >   		APPLY_DEBUG();
> > >   		vcpu_run(vm, VCPU_ID);
> > >
Sean Christopherson Nov. 1, 2021, 11:21 p.m. UTC | #5
On Mon, Nov 01, 2021, Maxim Levitsky wrote:
> On Mon, 2021-11-01 at 16:43 +0100, Vitaly Kuznetsov wrote:
> > Paolo Bonzini <pbonzini@redhat.com> writes:
> > 
> > > On 11/08/21 14:29, Maxim Levitsky wrote:
> > > > Modify debug_regs test to create a pending interrupt
> > > > and see that it is blocked when single stepping is done
> > > > with KVM_GUESTDBG_BLOCKIRQ
> > > > 
> > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > ---
> > > >   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
> > > >   1 file changed, 21 insertions(+), 3 deletions(-)
> > > 
> > > I haven't looked very much at this, but the test fails.
> > > 
> > 
> > Same here,
> > 
> > the test passes on AMD but fails consistently on Intel:
> > 
> > # ./x86_64/debug_regs 
> > ==== Test Assertion Failure ====
> >   x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
> >   pid=13434 tid=13434 errno=0 - Success
> >      1	0x00000000004027c6: main at debug_regs.c:179
> >      2	0x00007f65344cf554: ?? ??:0
> >      3	0x000000000040294a: _start at ??:?
> >   SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
> > 
> > (I know I'm late to the party).
> 
> Well that is strange. It passes on my intel laptop. Just tested 
> (kvm/queue + qemu master, compiled today) :-(
> 
> It fails on iteration 1 (and there is iteration 0) which I think means that we
> start with RIP on sti, and get #DB on start of xor instruction first (correctly), 
> and then we get #DB again on start of xor instruction again?
> 
> Something very strange. My laptop has i7-7600U.

I haven't verified on hardware, but my guess is that this code in vmx_vcpu_run()

	/* When single-stepping over STI and MOV SS, we must clear the
	 * corresponding interruptibility bits in the guest state. Otherwise
	 * vmentry fails as it then expects bit 14 (BS) in pending debug
	 * exceptions being set, but that's not correct for the guest debugging
	 * case. */
	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
		vmx_set_interrupt_shadow(vcpu, 0);

interacts badly with APICv=1.  It will kill the STI shadow and cause the IRQ in
vmcs.GUEST_RVI to be recognized when it (micro-)architecturally should not.  My
head is going in circles trying to sort out what would actually happen.  Maybe
comment out that and/or disable APICv to see if either one makes the test pass?
Vitaly Kuznetsov Nov. 2, 2021, 10:46 a.m. UTC | #6
Sean Christopherson <seanjc@google.com> writes:

> On Mon, Nov 01, 2021, Maxim Levitsky wrote:
>> On Mon, 2021-11-01 at 16:43 +0100, Vitaly Kuznetsov wrote:
>> > Paolo Bonzini <pbonzini@redhat.com> writes:
>> > 
>> > > On 11/08/21 14:29, Maxim Levitsky wrote:
>> > > > Modify debug_regs test to create a pending interrupt
>> > > > and see that it is blocked when single stepping is done
>> > > > with KVM_GUESTDBG_BLOCKIRQ
>> > > > 
>> > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
>> > > > ---
>> > > >   .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++++++++++++++++---
>> > > >   1 file changed, 21 insertions(+), 3 deletions(-)
>> > > 
>> > > I haven't looked very much at this, but the test fails.
>> > > 
>> > 
>> > Same here,
>> > 
>> > the test passes on AMD but fails consistently on Intel:
>> > 
>> > # ./x86_64/debug_regs 
>> > ==== Test Assertion Failure ====
>> >   x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
>> >   pid=13434 tid=13434 errno=0 - Success
>> >      1	0x00000000004027c6: main at debug_regs.c:179
>> >      2	0x00007f65344cf554: ?? ??:0
>> >      3	0x000000000040294a: _start at ??:?
>> >   SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
>> > 
>> > (I know I'm late to the party).
>> 
>> Well that is strange. It passes on my intel laptop. Just tested 
>> (kvm/queue + qemu master, compiled today) :-(
>> 
>> It fails on iteration 1 (and there is iteration 0) which I think means that we
>> start with RIP on sti, and get #DB on start of xor instruction first (correctly), 
>> and then we get #DB again on start of xor instruction again?
>> 
>> Something very strange. My laptop has i7-7600U.
>
> I haven't verified on hardware, but my guess is that this code in vmx_vcpu_run()
>
> 	/* When single-stepping over STI and MOV SS, we must clear the
> 	 * corresponding interruptibility bits in the guest state. Otherwise
> 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
> 	 * exceptions being set, but that's not correct for the guest debugging
> 	 * case. */
> 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> 		vmx_set_interrupt_shadow(vcpu, 0);
>
> interacts badly with APICv=1.  It will kill the STI shadow and cause the IRQ in
> vmcs.GUEST_RVI to be recognized when it (micro-)architecturally should not.  My
> head is going in circles trying to sort out what would actually happen.  Maybe
> comment out that and/or disable APICv to see if either one makes the test pass?
>

Interestingly,

loading 'kvm-intel' with 'enable_apicv=0' makes the test pass, however,
commenting out "vmx_set_interrupt_shadow()" as suggested gives a
different result (with enable_apicv=1):

# ./x86_64/debug_regs 
==== Test Assertion Failure ====
  x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
  pid=16352 tid=16352 errno=0 - Success
     1	0x0000000000402b33: main at debug_regs.c:179 (discriminator 10)
     2	0x00007f36401bd554: ?? ??:0
     3	0x00000000004023a9: _start at ??:?
  SINGLE_STEP[1]: exit 9 exception -2147483615 rip 0x1 (should be 0x4024d9) dr6 0xffff4ff0 (should be 0xffff4ff0)

this is a fairly old "Intel(R) Xeon(R) CPU E5-2603 v3".
Sean Christopherson Nov. 2, 2021, 3:53 p.m. UTC | #7
On Tue, Nov 02, 2021, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> > I haven't verified on hardware, but my guess is that this code in vmx_vcpu_run()
> >
> > 	/* When single-stepping over STI and MOV SS, we must clear the
> > 	 * corresponding interruptibility bits in the guest state. Otherwise
> > 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
> > 	 * exceptions being set, but that's not correct for the guest debugging
> > 	 * case. */
> > 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> > 		vmx_set_interrupt_shadow(vcpu, 0);
> >
> > interacts badly with APICv=1.  It will kill the STI shadow and cause the IRQ in
> > vmcs.GUEST_RVI to be recognized when it (micro-)architecturally should not.  My
> > head is going in circles trying to sort out what would actually happen.  Maybe
> > comment out that and/or disable APICv to see if either one makes the test pass?
> >
> 
> Interestingly,
> 
> loading 'kvm-intel' with 'enable_apicv=0' makes the test pass, however,
> commenting out "vmx_set_interrupt_shadow()" as suggested gives a
> different result (with enable_apicv=1):
> 
> # ./x86_64/debug_regs 
> ==== Test Assertion Failure ====
>   x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
>   pid=16352 tid=16352 errno=0 - Success
>      1	0x0000000000402b33: main at debug_regs.c:179 (discriminator 10)
>      2	0x00007f36401bd554: ?? ??:0
>      3	0x00000000004023a9: _start at ??:?
>   SINGLE_STEP[1]: exit 9 exception -2147483615 rip 0x1 (should be 0x4024d9) dr6 0xffff4ff0 (should be 0xffff4ff0)

Exit 9 is KVM_EXIT_FAIL_ENTRY, which in this case VM-Entry likely failed due to
invalid guest state because there was STI blocking with single-step enabled but
no pending BS #DB:

  Bit 14 (BS) must be 1 if the TF flag (bit 8) in the RFLAGS field is 1 and the
  BTF flag (bit 1) in the IA32_DEBUGCTL field is 0.

Which is precisely what that hack-a-fix avoids.  There isn't really a clean
solution for legacy single-step, AFAIK the only way to avoid this would be to
switch KVM_GUESTDBG_SINGLESTEP to use MTF.

But that mess is a red herring, the test fails with the same signature with APICv=1
if the STI is replaced by PUSHF+BTS+POPFD (to avoid the STI shadow).  We all missed
this key detail from Vitaly's report:

SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
                ^^^^^^

Exit '8' is KVM_EXIT_SHUTDOWN, i.e. the arrival of the IRQ hosed the guest because
the test doesn't invoke vm_init_descriptor_tables() to install event handlers.
The "exception 1" shows up because the run page isn't sanitized by the test, i.e.
it's stale data that happens to match.

So I would fully expect this test to fail with AVIC=1.  The problem is that
KVM_GUESTDBG_BLOCKIRQ does absolutely nothing to handle APICv interrupts.  And
even if KVM does something to fudge that behavior in the emulated local APIC, the
test will then fail miserably virtual IPIs (currently AVIC only).

I stand by my original comment that "Deviating this far from architectural behavior
will end in tears at some point."  Rather than try to "fix" APICv, I vote to instead
either reject KVM_GUESTDBG_BLOCKIRQ if APICv=1, or log a debug message saying that
KVM_GUESTDBG_BLOCKIRQ is ineffective with APICv=1.
Vitaly Kuznetsov Nov. 2, 2021, 4:18 p.m. UTC | #8
Sean Christopherson <seanjc@google.com> writes:

> On Tue, Nov 02, 2021, Vitaly Kuznetsov wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> > I haven't verified on hardware, but my guess is that this code in vmx_vcpu_run()
>> >
>> > 	/* When single-stepping over STI and MOV SS, we must clear the
>> > 	 * corresponding interruptibility bits in the guest state. Otherwise
>> > 	 * vmentry fails as it then expects bit 14 (BS) in pending debug
>> > 	 * exceptions being set, but that's not correct for the guest debugging
>> > 	 * case. */
>> > 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>> > 		vmx_set_interrupt_shadow(vcpu, 0);
>> >
>> > interacts badly with APICv=1.  It will kill the STI shadow and cause the IRQ in
>> > vmcs.GUEST_RVI to be recognized when it (micro-)architecturally should not.  My
>> > head is going in circles trying to sort out what would actually happen.  Maybe
>> > comment out that and/or disable APICv to see if either one makes the test pass?
>> >
>> 
>> Interestingly,
>> 
>> loading 'kvm-intel' with 'enable_apicv=0' makes the test pass, however,
>> commenting out "vmx_set_interrupt_shadow()" as suggested gives a
>> different result (with enable_apicv=1):
>> 
>> # ./x86_64/debug_regs 
>> ==== Test Assertion Failure ====
>>   x86_64/debug_regs.c:179: run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6
>>   pid=16352 tid=16352 errno=0 - Success
>>      1	0x0000000000402b33: main at debug_regs.c:179 (discriminator 10)
>>      2	0x00007f36401bd554: ?? ??:0
>>      3	0x00000000004023a9: _start at ??:?
>>   SINGLE_STEP[1]: exit 9 exception -2147483615 rip 0x1 (should be 0x4024d9) dr6 0xffff4ff0 (should be 0xffff4ff0)
>
> Exit 9 is KVM_EXIT_FAIL_ENTRY, which in this case VM-Entry likely failed due to
> invalid guest state because there was STI blocking with single-step enabled but
> no pending BS #DB:
>
>   Bit 14 (BS) must be 1 if the TF flag (bit 8) in the RFLAGS field is 1 and the
>   BTF flag (bit 1) in the IA32_DEBUGCTL field is 0.
>
> Which is precisely what that hack-a-fix avoids.  There isn't really a clean
> solution for legacy single-step, AFAIK the only way to avoid this would be to
> switch KVM_GUESTDBG_SINGLESTEP to use MTF.
>
> But that mess is a red herring, the test fails with the same signature with APICv=1
> if the STI is replaced by PUSHF+BTS+POPFD (to avoid the STI shadow).  We all missed
> this key detail from Vitaly's report:
>
> SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
>                 ^^^^^^
>
> Exit '8' is KVM_EXIT_SHUTDOWN, i.e. the arrival of the IRQ hosed the guest because
> the test doesn't invoke vm_init_descriptor_tables() to install event handlers.
> The "exception 1" shows up because the run page isn't sanitized by the test, i.e.
> it's stale data that happens to match.
>
> So I would fully expect this test to fail with AVIC=1.  The problem is that
> KVM_GUESTDBG_BLOCKIRQ does absolutely nothing to handle APICv interrupts.  And
> even if KVM does something to fudge that behavior in the emulated local APIC, the
> test will then fail miserably virtual IPIs (currently AVIC only).

FWIW, the test doesn't seem to fail on my AMD EPYC system even with "avic=1" ...

>
> I stand by my original comment that "Deviating this far from architectural behavior
> will end in tears at some point."  Rather than try to "fix" APICv, I vote to instead
> either reject KVM_GUESTDBG_BLOCKIRQ if APICv=1, or log a debug message saying that
> KVM_GUESTDBG_BLOCKIRQ is ineffective with APICv=1.
>
Sean Christopherson Nov. 2, 2021, 6:45 p.m. UTC | #9
On Tue, Nov 02, 2021, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> > But that mess is a red herring, the test fails with the same signature with APICv=1
> > if the STI is replaced by PUSHF+BTS+POPFD (to avoid the STI shadow).  We all missed
> > this key detail from Vitaly's report:
> >
> > SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
> >                 ^^^^^^
> >
> > Exit '8' is KVM_EXIT_SHUTDOWN, i.e. the arrival of the IRQ hosed the guest because
> > the test doesn't invoke vm_init_descriptor_tables() to install event handlers.
> > The "exception 1" shows up because the run page isn't sanitized by the test, i.e.
> > it's stale data that happens to match.
> >
> > So I would fully expect this test to fail with AVIC=1.  The problem is that
> > KVM_GUESTDBG_BLOCKIRQ does absolutely nothing to handle APICv interrupts.  And
> > even if KVM does something to fudge that behavior in the emulated local APIC, the
> > test will then fail miserably virtual IPIs (currently AVIC only).
> 
> FWIW, the test doesn't seem to fail on my AMD EPYC system even with "avic=1" ...

Huh.  Assuming the IRQ is pending in the vIRR and KVM didn't screw up elsewhere,
that seems like a CPU AVIC bug.  #DBs have priority over IRQs, but single-step
#DBs are trap-like and KVM (hopefully) isn't injecting a #DB, so a pending IRQ
should be taken on the current instruction in the guest when executing VMRUN with
guest.EFLAGS.IF=1,TF=1 since there will be a one-instruction delay before the
single-step #DB kicks in.
Maxim Levitsky Nov. 3, 2021, 9:04 a.m. UTC | #10
On Tue, 2021-11-02 at 18:45 +0000, Sean Christopherson wrote:
> On Tue, Nov 02, 2021, Vitaly Kuznetsov wrote:
> > Sean Christopherson <seanjc@google.com> writes:
> > > But that mess is a red herring, the test fails with the same signature with APICv=1
> > > if the STI is replaced by PUSHF+BTS+POPFD (to avoid the STI shadow).  We all missed
> > > this key detail from Vitaly's report:
> > > 
> > > SINGLE_STEP[1]: exit 8 exception 1 rip 0x402a25 (should be 0x402a27) dr6 0xffff4ff0 (should be 0xffff4ff0)
> > >                 ^^^^^^
> > > 
> > > Exit '8' is KVM_EXIT_SHUTDOWN, i.e. the arrival of the IRQ hosed the guest because
> > > the test doesn't invoke vm_init_descriptor_tables() to install event handlers.
> > > The "exception 1" shows up because the run page isn't sanitized by the test, i.e.
> > > it's stale data that happens to match.
> > > 
> > > So I would fully expect this test to fail with AVIC=1.  The problem is that
> > > KVM_GUESTDBG_BLOCKIRQ does absolutely nothing to handle APICv interrupts.  And
> > > even if KVM does something to fudge that behavior in the emulated local APIC, the
> > > test will then fail miserably virtual IPIs (currently AVIC only).
> > 
> > FWIW, the test doesn't seem to fail on my AMD EPYC system even with "avic=1" ...
Its because AVIC is inhibited for many reasons. In this test x2apic is used,
and having x2apic in CPUID inhibits AVIC.

> 
> Huh.  Assuming the IRQ is pending in the vIRR and KVM didn't screw up elsewhere,
> that seems like a CPU AVIC bug.  #DBs have priority over IRQs, but single-step
> #DBs are trap-like and KVM (hopefully) isn't injecting a #DB, so a pending IRQ
> should be taken on the current instruction in the guest when executing VMRUN with
> guest.EFLAGS.IF=1,TF=1 since there will be a one-instruction delay before the
> single-step #DB kicks in.
> 
We could inhibit AVIC/APICv when KVM_GUESTDBG_BLOCKIRQ is in use, I'll send patch for
this soon.

Thanks a lot for finding out what is going on!

Best regards,	Maxim Levitsky
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 6097a8283377..5f078db1bcba 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -8,12 +8,15 @@ 
 #include <string.h>
 #include "kvm_util.h"
 #include "processor.h"
+#include "apic.h"
 
 #define VCPU_ID 0
 
 #define DR6_BD		(1 << 13)
 #define DR7_GD		(1 << 13)
 
+#define IRQ_VECTOR 0xAA
+
 /* For testing data access debug BP */
 uint32_t guest_value;
 
@@ -21,6 +24,11 @@  extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
 
 static void guest_code(void)
 {
+	/* Create a pending interrupt on current vCPU */
+	x2apic_enable();
+	x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+			 APIC_DM_FIXED | IRQ_VECTOR);
+
 	/*
 	 * Software BP tests.
 	 *
@@ -38,12 +46,19 @@  static void guest_code(void)
 		     "mov %%rax,%0;\n\t write_data:"
 		     : "=m" (guest_value) : : "rax");
 
-	/* Single step test, covers 2 basic instructions and 2 emulated */
+	/*
+	 * Single step test, covers 2 basic instructions and 2 emulated
+	 *
+	 * Enable interrupts during the single stepping to see that
+	 * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+	 */
 	asm volatile("ss_start: "
+		     "sti\n\t"
 		     "xor %%eax,%%eax\n\t"
 		     "cpuid\n\t"
 		     "movl $0x1a0,%%ecx\n\t"
 		     "rdmsr\n\t"
+		     "cli\n\t"
 		     : : : "eax", "ebx", "ecx", "edx");
 
 	/* DR6.BD test */
@@ -72,11 +87,13 @@  int main(void)
 	uint64_t cmd;
 	int i;
 	/* Instruction lengths starting at ss_start */
-	int ss_size[4] = {
+	int ss_size[6] = {
+		1,		/* sti*/
 		2,		/* xor */
 		2,		/* cpuid */
 		5,		/* mov */
 		2,		/* rdmsr */
+		1,		/* cli */
 	};
 
 	if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
@@ -154,7 +171,8 @@  int main(void)
 	for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
 		target_rip += ss_size[i];
 		CLEAR_DEBUG();
-		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+		debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+				KVM_GUESTDBG_BLOCKIRQ;
 		debug.arch.debugreg[7] = 0x00000400;
 		APPLY_DEBUG();
 		vcpu_run(vm, VCPU_ID);