diff mbox

[9/9,SVM] inject NMI after IRET from a previous NMI, not before.

Message ID 1241511275-2261-9-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov May 5, 2009, 8:14 a.m. UTC
If NMI is received during handling of another NMI it should be injected
immediately after IRET from previous NMI handler, but SVM intercept IRET
before instruction execution so we can't inject pending NMI at this
point and there is not way to request exit when NMI window opens. This
patch fix SVM code to open NMI window after IRET by single stepping over
IRET instruction.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 +
 arch/x86/kvm/svm.c              |   62 +++++++++++++++++++++++++++++++-------
 2 files changed, 52 insertions(+), 12 deletions(-)

Comments

Jan Kiszka May 5, 2009, 8:45 a.m. UTC | #1
Gleb Natapov wrote:
> If NMI is received during handling of another NMI it should be injected
> immediately after IRET from previous NMI handler, but SVM intercept IRET
> before instruction execution so we can't inject pending NMI at this
> point and there is not way to request exit when NMI window opens. This
> patch fix SVM code to open NMI window after IRET by single stepping over
> IRET instruction.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    2 +
>  arch/x86/kvm/svm.c              |   62 +++++++++++++++++++++++++++++++-------
>  2 files changed, 52 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fea0429..bcd0857 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -358,6 +358,7 @@ struct kvm_vcpu_arch {
>  	unsigned int time_offset;
>  	struct page *time_page;
>  
> +	bool singlestep; /* guest is single stepped by KVM */
>  	bool nmi_pending;
>  	bool nmi_injected;
>  
> @@ -772,6 +773,7 @@ enum {
>  #define HF_HIF_MASK		(1 << 1)
>  #define HF_VINTR_MASK		(1 << 2)
>  #define HF_NMI_MASK		(1 << 3)
> +#define HF_IRET_MASK		(1 << 4)
>  
>  /*
>   * Hardware virtualization extension instructions may fault if a
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d5173a2..bf10991 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -933,15 +933,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
>  
>  }
>  
> -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
> +static void update_db_intercept(struct kvm_vcpu *vcpu)
>  {
> -	int old_debug = vcpu->guest_debug;
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	vcpu->guest_debug = dbg->control;
> -
>  	svm->vmcb->control.intercept_exceptions &=
>  		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
> +
> +	if (vcpu->arch.singlestep)
> +		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
> +
>  	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
>  		if (vcpu->guest_debug &
>  		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
> @@ -952,6 +953,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
>  				1 << BP_VECTOR;
>  	} else
>  		vcpu->guest_debug = 0;
> +}
> +
> +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
> +{
> +	int old_debug = vcpu->guest_debug;
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	vcpu->guest_debug = dbg->control;
> +
> +	update_db_intercept(vcpu);
>  
>  	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
>  		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
> @@ -1101,14 +1112,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	if (!(svm->vcpu.guest_debug &
> -	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
> +	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
> +		!svm->vcpu.arch.singlestep) {
>  		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
>  		return 1;
>  	}
> -	kvm_run->exit_reason = KVM_EXIT_DEBUG;
> -	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
> -	kvm_run->debug.arch.exception = DB_VECTOR;
> -	return 0;
> +
> +	if (svm->vcpu.arch.singlestep) {
> +		svm->vcpu.arch.singlestep = false;
> +		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
> +			svm->vmcb->save.rflags &=
> +				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
> +		update_db_intercept(to_svm(svm));
> +	}
> +
> +	if (svm->vcpu.guest_debug &
> +	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
> +		kvm_run->exit_reason = KVM_EXIT_DEBUG;
> +		kvm_run->debug.arch.pc =
> +			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
> +		kvm_run->debug.arch.exception = DB_VECTOR;
> +		return 0;
> +	}
> +
> +	return 1;
>  }
>  
>  static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> @@ -1855,7 +1882,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	++svm->vcpu.stat.nmi_window_exits;
>  	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> -	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> +	svm->vcpu.arch.hflags |= HF_IRET_MASK;
>  	return 1;
>  }
>  
> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> -		enable_irq_window(vcpu);
> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> +	    == HF_NMI_MASK)
> +		return; /* IRET will cause a vm exit */
> +
> +	/* Something prevents NMI from been injected. Single step over
> +	   possible problem (IRET or exception injection or interrupt
> +	   shadow) */
> +	vcpu->arch.singlestep = true;
> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);

Can you single-step like this out of an IRQ handler? I mean, IRET will
restore the flags from the stack, and those settings should be
overwritten. Or am I missing something?

> +	update_db_intercept(vcpu);
>  }
>  
>  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
> @@ -2375,6 +2410,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	int type;
>  	u32 exitintinfo = svm->vmcb->control.exit_int_info;
>  
> +	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
> +		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
> +
>  	svm->vcpu.arch.nmi_injected = false;
>  	kvm_clear_exception_queue(&svm->vcpu);
>  	kvm_clear_interrupt_queue(&svm->vcpu);

Jan
Gleb Natapov May 5, 2009, 9:03 a.m. UTC | #2
On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
> > @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
> >  {
> >  	struct vcpu_svm *svm = to_svm(vcpu);
> >  
> > -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> > -		enable_irq_window(vcpu);
> > +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> > +	    == HF_NMI_MASK)
> > +		return; /* IRET will cause a vm exit */
> > +
> > +	/* Something prevents NMI from been injected. Single step over
> > +	   possible problem (IRET or exception injection or interrupt
> > +	   shadow) */
> > +	vcpu->arch.singlestep = true;
> > +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> 
> Can you single-step like this out of an IRQ handler? I mean, IRET will
> restore the flags from the stack, and those settings should be
> overwritten. Or am I missing something?
> 
It seems to be working :) Shouldn't CPU checks single step before
executing IRET and thus using old flags value? It is interesting to
check what rflag value is immediately after IRET.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka May 5, 2009, 9:25 a.m. UTC | #3
Gleb Natapov wrote:
> On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
>>> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>>>  {
>>>  	struct vcpu_svm *svm = to_svm(vcpu);
>>>  
>>> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
>>> -		enable_irq_window(vcpu);
>>> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
>>> +	    == HF_NMI_MASK)
>>> +		return; /* IRET will cause a vm exit */
>>> +
>>> +	/* Something prevents NMI from been injected. Single step over
>>> +	   possible problem (IRET or exception injection or interrupt
>>> +	   shadow) */
>>> +	vcpu->arch.singlestep = true;
>>> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
>> Can you single-step like this out of an IRQ handler? I mean, IRET will
>> restore the flags from the stack, and those settings should be
>> overwritten. Or am I missing something?
>>
> It seems to be working :) Shouldn't CPU checks single step before
> executing IRET and thus using old flags value? It is interesting to
> check what rflag value is immediately after IRET.

Hmm, guess I have to re-read some manuals. But regarding
rflags-after-iret, I think it should be cleared due to that restoring
from the stack.

Jan
Gleb Natapov May 5, 2009, 9:32 a.m. UTC | #4
On Tue, May 05, 2009 at 11:25:13AM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Tue, May 05, 2009 at 10:45:20AM +0200, Jan Kiszka wrote:
> >>> @@ -2331,8 +2358,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
> >>>  {
> >>>  	struct vcpu_svm *svm = to_svm(vcpu);
> >>>  
> >>> -	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> >>> -		enable_irq_window(vcpu);
> >>> +	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
> >>> +	    == HF_NMI_MASK)
> >>> +		return; /* IRET will cause a vm exit */
> >>> +
> >>> +	/* Something prevents NMI from been injected. Single step over
> >>> +	   possible problem (IRET or exception injection or interrupt
> >>> +	   shadow) */
> >>> +	vcpu->arch.singlestep = true;
> >>> +	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
> >> Can you single-step like this out of an IRQ handler? I mean, IRET will
> >> restore the flags from the stack, and those settings should be
> >> overwritten. Or am I missing something?
> >>
> > It seems to be working :) Shouldn't CPU checks single step before
> > executing IRET and thus using old flags value? It is interesting to
> > check what rflag value is immediately after IRET.
> 
> Hmm, guess I have to re-read some manuals. But regarding
> rflags-after-iret, I think it should be cleared due to that restoring
> from the stack.
> 
Just re-tested this once more. DB is intercepted after IRET and TF/RF is
cleared already.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fea0429..bcd0857 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -358,6 +358,7 @@  struct kvm_vcpu_arch {
 	unsigned int time_offset;
 	struct page *time_page;
 
+	bool singlestep; /* guest is single stepped by KVM */
 	bool nmi_pending;
 	bool nmi_injected;
 
@@ -772,6 +773,7 @@  enum {
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
 #define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d5173a2..bf10991 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -933,15 +933,16 @@  static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-	int old_debug = vcpu->guest_debug;
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	vcpu->guest_debug = dbg->control;
-
 	svm->vmcb->control.intercept_exceptions &=
 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+	if (vcpu->arch.singlestep)
+		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
 		if (vcpu->guest_debug &
 		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -952,6 +953,16 @@  static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 				1 << BP_VECTOR;
 	} else
 		vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+	int old_debug = vcpu->guest_debug;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	vcpu->guest_debug = dbg->control;
+
+	update_db_intercept(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -1101,14 +1112,30 @@  static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (!(svm->vcpu.guest_debug &
-	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+		!svm->vcpu.arch.singlestep) {
 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
 		return 1;
 	}
-	kvm_run->exit_reason = KVM_EXIT_DEBUG;
-	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-	kvm_run->debug.arch.exception = DB_VECTOR;
-	return 0;
+
+	if (svm->vcpu.arch.singlestep) {
+		svm->vcpu.arch.singlestep = false;
+		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+			svm->vmcb->save.rflags &=
+				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+		update_db_intercept(to_svm(svm));
+	}
+
+	if (svm->vcpu.guest_debug &
+	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+		kvm_run->exit_reason = KVM_EXIT_DEBUG;
+		kvm_run->debug.arch.pc =
+			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+		kvm_run->debug.arch.exception = DB_VECTOR;
+		return 0;
+	}
+
+	return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1855,7 +1882,7 @@  static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.nmi_window_exits;
 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
-	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	svm->vcpu.arch.hflags |= HF_IRET_MASK;
 	return 1;
 }
 
@@ -2331,8 +2358,16 @@  static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
-		enable_irq_window(vcpu);
+	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+	    == HF_NMI_MASK)
+		return; /* IRET will cause a vm exit */
+
+	/* Something prevents NMI from been injected. Single step over
+	   possible problem (IRET or exception injection or interrupt
+	   shadow) */
+	vcpu->arch.singlestep = true;
+	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+	update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2375,6 +2410,9 @@  static void svm_complete_interrupts(struct vcpu_svm *svm)
 	int type;
 	u32 exitintinfo = svm->vmcb->control.exit_int_info;
 
+	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
 	svm->vcpu.arch.nmi_injected = false;
 	kvm_clear_exception_queue(&svm->vcpu);
 	kvm_clear_interrupt_queue(&svm->vcpu);