diff mbox

[4/4] Fix task switching.

Message ID 20090329141218.30481.92675.stgit@trex.usersys.redhat.com (mailing list archive)
State Accepted
Headers show

Commit Message

Gleb Natapov March 29, 2009, 2:12 p.m. UTC
The patch fixes two problems with task switching.
1. Back link is written to a wrong TSS.
2. Instruction emulation is not needed if the reason for task switch
   is a task gate in IDT and access to it is caused by an external even.

2 is currently solved only for VMX since there is not reliable way to
skip an instruction in SVM. We should emulate it instead.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---

 arch/x86/include/asm/svm.h |    1 +
 arch/x86/kvm/svm.c         |   25 ++++++++++++++++++-------
 arch/x86/kvm/vmx.c         |   40 +++++++++++++++++++++++++++++-----------
 arch/x86/kvm/x86.c         |   40 +++++++++++++++++++++++++++++++---------
 4 files changed, 79 insertions(+), 27 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity March 30, 2009, 7:39 a.m. UTC | #1
Gleb Natapov wrote:
> The patch fixes two problems with task switching.
> 1. Back link is written to a wrong TSS.
> 2. Instruction emulation is not needed if the reason for task switch
>    is a task gate in IDT and access to it is caused by an external even.
>
> 2 is currently solved only for VMX since there is not reliable way to
> skip an instruction in SVM. We should emulate it instead.
>
>   

Looks good, but please split into (at least) two patches.  Also please 
provide a test case so we don't regress again.
Jan Kiszka March 30, 2009, 4:04 p.m. UTC | #2
Gleb Natapov wrote:
> The patch fixes two problems with task switching.
> 1. Back link is written to a wrong TSS.
> 2. Instruction emulation is not needed if the reason for task switch
>    is a task gate in IDT and access to it is caused by an external even.
> 
> 2 is currently solved only for VMX since there is not reliable way to
> skip an instruction in SVM. We should emulate it instead.

Does this series fix all issues Bernhard, Thomas and Julian stumbled over?

Jan

> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
> 
>  arch/x86/include/asm/svm.h |    1 +
>  arch/x86/kvm/svm.c         |   25 ++++++++++++++++++-------
>  arch/x86/kvm/vmx.c         |   40 +++++++++++++++++++++++++++++-----------
>  arch/x86/kvm/x86.c         |   40 +++++++++++++++++++++++++++++++---------
>  4 files changed, 79 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index 82ada75..85574b7 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
>  #define SVM_EVTINJ_VALID_ERR (1 << 11)
>  
>  #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
> +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
>  
>  #define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
>  #define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1fcbc17..3ffb695 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1823,17 +1823,28 @@ static int task_switch_interception(struct vcpu_svm *svm,
>  				    struct kvm_run *kvm_run)
>  {
>  	u16 tss_selector;
> +	int reason;
> +	int int_type = svm->vmcb->control.exit_int_info &
> +		SVM_EXITINTINFO_TYPE_MASK;
>  
>  	tss_selector = (u16)svm->vmcb->control.exit_info_1;
> +
>  	if (svm->vmcb->control.exit_info_2 &
>  	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
> -		return kvm_task_switch(&svm->vcpu, tss_selector,
> -				       TASK_SWITCH_IRET);
> -	if (svm->vmcb->control.exit_info_2 &
> -	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
> -		return kvm_task_switch(&svm->vcpu, tss_selector,
> -				       TASK_SWITCH_JMP);
> -	return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
> +		reason = TASK_SWITCH_IRET;
> +	else if (svm->vmcb->control.exit_info_2 &
> +		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
> +		reason = TASK_SWITCH_JMP;
> +	else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID)
> +		reason = TASK_SWITCH_GATE;
> +	else
> +		reason = TASK_SWITCH_CALL;
> +
> +
> +	if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT)
> +		skip_emulated_instruction(&svm->vcpu);
> +
> +	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
>  }
>  
>  static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 0da7a9e..01db958 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3025,22 +3025,40 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long exit_qualification;
>  	u16 tss_selector;
> -	int reason;
> +	int reason, type, idt_v;
> +
> +	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
> +	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
>  
>  	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  
>  	reason = (u32)exit_qualification >> 30;
> -	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
> -	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> -	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
> -	    == INTR_TYPE_NMI_INTR) {
> -		vcpu->arch.nmi_injected = false;
> -		if (cpu_has_virtual_nmis())
> -			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> -				      GUEST_INTR_STATE_NMI);
> +	if (reason == TASK_SWITCH_GATE && idt_v) {
> +		switch (type) {
> +		case INTR_TYPE_NMI_INTR:
> +			vcpu->arch.nmi_injected = false;
> +			if (cpu_has_virtual_nmis())
> +				vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> +					      GUEST_INTR_STATE_NMI);
> +			break;
> +		case INTR_TYPE_EXT_INTR:
> +			kvm_clear_interrupt_queue(vcpu);
> +			break;
> +		case INTR_TYPE_HARD_EXCEPTION:
> +		case INTR_TYPE_SOFT_EXCEPTION:
> +			kvm_clear_exception_queue(vcpu);
> +			break;
> +		default:
> +			break;
> +		}
>  	}
>  	tss_selector = exit_qualification;
>  
> +	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
> +		       type != INTR_TYPE_EXT_INTR &&
> +		       type != INTR_TYPE_NMI_INTR))
> +		skip_emulated_instruction(vcpu);
> +
>  	if (!kvm_task_switch(vcpu, tss_selector, reason))
>  		return 0;
>  
> @@ -3292,8 +3310,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  
>  	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
>  	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
> -	
> -	switch(type) {
> +
> +	switch (type) {
>  	case INTR_TYPE_NMI_INTR:
>  		vmx->vcpu.arch.nmi_injected = true;
>  		/*
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ae4918c..573bb3f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3697,7 +3697,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
>  	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
>  	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
>  	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
> -	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
>  }
>  
>  static int load_state_from_tss32(struct kvm_vcpu *vcpu,
> @@ -3794,8 +3793,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
>  }
>  
>  static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
> -		       u32 old_tss_base,
> -		       struct desc_struct *nseg_desc)
> +			      u16 old_tss_sel, u32 old_tss_base,
> +			      struct desc_struct *nseg_desc)
>  {
>  	struct tss_segment_16 tss_segment_16;
>  	int ret = 0;
> @@ -3814,6 +3813,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
>  			   &tss_segment_16, sizeof tss_segment_16))
>  		goto out;
>  
> +	if (old_tss_sel != 0xffff) {
> +		tss_segment_16.prev_task_link = old_tss_sel;
> +
> +		if (kvm_write_guest(vcpu->kvm,
> +				    get_tss_base_addr(vcpu, nseg_desc),
> +				    &tss_segment_16.prev_task_link,
> +				    sizeof tss_segment_16.prev_task_link))
> +			goto out;
> +	}
> +
>  	if (load_state_from_tss16(vcpu, &tss_segment_16))
>  		goto out;
>  
> @@ -3823,7 +3832,7 @@ out:
>  }
>  
>  static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
> -		       u32 old_tss_base,
> +		       u16 old_tss_sel, u32 old_tss_base,
>  		       struct desc_struct *nseg_desc)
>  {
>  	struct tss_segment_32 tss_segment_32;
> @@ -3843,6 +3852,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
>  			   &tss_segment_32, sizeof tss_segment_32))
>  		goto out;
>  
> +	if (old_tss_sel != 0xffff) {
> +		tss_segment_32.prev_task_link = old_tss_sel;
> +
> +		if (kvm_write_guest(vcpu->kvm,
> +				    get_tss_base_addr(vcpu, nseg_desc),
> +				    &tss_segment_32.prev_task_link,
> +				    sizeof tss_segment_32.prev_task_link))
> +			goto out;
> +	}
> +
>  	if (load_state_from_tss32(vcpu, &tss_segment_32))
>  		goto out;
>  
> @@ -3896,14 +3915,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
>  		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
>  	}
>  
> -	kvm_x86_ops->skip_emulated_instruction(vcpu);
> +	/* set back link to prev task only if NT bit is set in eflags
> +	   note that old_tss_sel is not used afetr this point */
> +	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
> +		old_tss_sel = 0xffff;
>  
>  	if (nseg_desc.type & 8)
> -		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
> -					 &nseg_desc);
> +		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
> +					 old_tss_base, &nseg_desc);
>  	else
> -		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
> -					 &nseg_desc);
> +		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
> +					 old_tss_base, &nseg_desc);
>  
>  	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
>  		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Gleb Natapov March 30, 2009, 4:21 p.m. UTC | #3
On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > The patch fixes two problems with task switching.
> > 1. Back link is written to a wrong TSS.
> > 2. Instruction emulation is not needed if the reason for task switch
> >    is a task gate in IDT and access to it is caused by an external even.
> > 
> > 2 is currently solved only for VMX since there is not reliable way to
> > skip an instruction in SVM. We should emulate it instead.
> 
> Does this series fix all issues Bernhard, Thomas and Julian stumbled over?
> 
Haven't tried. I wrote my own tests for task switching. How can I check it?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka March 30, 2009, 4:35 p.m. UTC | #4
Gleb Natapov wrote:
> On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> The patch fixes two problems with task switching.
>>> 1. Back link is written to a wrong TSS.
>>> 2. Instruction emulation is not needed if the reason for task switch
>>>    is a task gate in IDT and access to it is caused by an external even.
>>>
>>> 2 is currently solved only for VMX since there is not reliable way to
>>> skip an instruction in SVM. We should emulate it instead.
>> Does this series fix all issues Bernhard, Thomas and Julian stumbled over?
>>
> Haven't tried. I wrote my own tests for task switching. How can I check it?
> 

There is a test case attached to Julian's sourceforge-reported bug:

https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599

And I guess Thomas or Bernhard will be happy to give it a try, too... :)

There was one issue, the IRQ injection bug [1] which was related to IRQ
tasks IIRC. Thomas and I finally suspected after a private chat that
there is actually a different reason behind it, something like
interrupt.pending should be cleared when the injection took place via an
(emulated) task switch. Any news on this, Thomas?

Jan

[1] http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/29288
Gleb Natapov March 30, 2009, 4:39 p.m. UTC | #5
On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Mon, Mar 30, 2009 at 06:04:45PM +0200, Jan Kiszka wrote:
> >> Gleb Natapov wrote:
> >>> The patch fixes two problems with task switching.
> >>> 1. Back link is written to a wrong TSS.
> >>> 2. Instruction emulation is not needed if the reason for task switch
> >>>    is a task gate in IDT and access to it is caused by an external even.
> >>>
> >>> 2 is currently solved only for VMX since there is not reliable way to
> >>> skip an instruction in SVM. We should emulate it instead.
> >> Does this series fix all issues Bernhard, Thomas and Julian stumbled over?
> >>
> > Haven't tried. I wrote my own tests for task switching. How can I check it?
> > 
> 
> There is a test case attached to Julian's sourceforge-reported bug:
> 
> https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599
> 
I'll try that.

> And I guess Thomas or Bernhard will be happy to give it a try, too... :)
> 
> There was one issue, the IRQ injection bug [1] which was related to IRQ
> tasks IIRC. Thomas and I finally suspected after a private chat that
> there is actually a different reason behind it, something like
> interrupt.pending should be cleared when the injection took place via an
> (emulated) task switch. Any news on this, Thomas?
> 
If this is the case then the patch series should fix it.

> Jan
> 
> [1] http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/29288
> 


--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 30, 2009, 4:46 p.m. UTC | #6
On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote:
> > Haven't tried. I wrote my own tests for task switching. How can I check it?
> > 
> 
> There is a test case attached to Julian's sourceforge-reported bug:
> 
> https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599
> 
Works for me.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Stecklina March 30, 2009, 11:54 p.m. UTC | #7
Gleb Natapov <gleb@redhat.com> writes:

> On Mon, Mar 30, 2009 at 06:35:05PM +0200, Jan Kiszka wrote:
>> > Haven't tried. I wrote my own tests for task switching. How can I check it?
>> > 
>> 
>> There is a test case attached to Julian's sourceforge-reported bug:
>> 
>> https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2681442&group_id=180599
>> 
> Works for me.

Then the patches should be fine (at least for me *g*).

Regards,
Bernhard Kohl March 31, 2009, 9:03 a.m. UTC | #8
Jan Kiszka wrote:
> 
> Gleb Natapov wrote:
> > The patch fixes two problems with task switching.
> > 1. Back link is written to a wrong TSS.
> > 2. Instruction emulation is not needed if the reason for task switch
> >    is a task gate in IDT and access to it is caused by an 
> external even.
> > 
> > 2 is currently solved only for VMX since there is not 
> reliable way to
> > skip an instruction in SVM. We should emulate it instead.
> 
> Does this series fix all issues Bernhard, Thomas and Julian 
> stumbled over?
> 
> Jan

I will try this today. Thanks.

Bernhard
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bernhard Kohl March 31, 2009, 3:21 p.m. UTC | #9
Bernhard Kohl wrote:
> 
> Jan Kiszka wrote:
> > 
> > Gleb Natapov wrote:
> > > The patch fixes two problems with task switching.
> > > 1. Back link is written to a wrong TSS.
> > > 2. Instruction emulation is not needed if the reason for 
> task switch
> > >    is a task gate in IDT and access to it is caused by an 
> > external even.
> > > 
> > > 2 is currently solved only for VMX since there is not 
> > reliable way to
> > > skip an instruction in SVM. We should emulate it instead.
> > 
> > Does this series fix all issues Bernhard, Thomas and Julian 
> > stumbled over?
> > 
> > Jan
> 
> I will try this today. Thanks.
> 
Yes, it works for us (Thomas + Bernhard).

Bernhard
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov March 31, 2009, 3:22 p.m. UTC | #10
On Tue, Mar 31, 2009 at 05:21:16PM +0200, Kohl, Bernhard (NSN - DE/Munich) wrote:
> Bernhard Kohl wrote:
> > 
> > Jan Kiszka wrote:
> > > 
> > > Gleb Natapov wrote:
> > > > The patch fixes two problems with task switching.
> > > > 1. Back link is written to a wrong TSS.
> > > > 2. Instruction emulation is not needed if the reason for 
> > task switch
> > > >    is a task gate in IDT and access to it is caused by an 
> > > external even.
> > > > 
> > > > 2 is currently solved only for VMX since there is not 
> > > reliable way to
> > > > skip an instruction in SVM. We should emulate it instead.
> > > 
> > > Does this series fix all issues Bernhard, Thomas and Julian 
> > > stumbled over?
> > > 
> > > Jan
> > 
> > I will try this today. Thanks.
> > 
> Yes, it works for us (Thomas + Bernhard).
> 
Great. Thanks for testing.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka April 1, 2009, 7:21 a.m. UTC | #11
Gleb Natapov wrote:
> On Tue, Mar 31, 2009 at 05:21:16PM +0200, Kohl, Bernhard (NSN - DE/Munich) wrote:
>> Bernhard Kohl wrote:
>>> Jan Kiszka wrote:
>>>> Gleb Natapov wrote:
>>>>> The patch fixes two problems with task switching.
>>>>> 1. Back link is written to a wrong TSS.
>>>>> 2. Instruction emulation is not needed if the reason for 
>>> task switch
>>>>>    is a task gate in IDT and access to it is caused by an 
>>>> external even.
>>>>> 2 is currently solved only for VMX since there is not 
>>>> reliable way to
>>>>> skip an instruction in SVM. We should emulate it instead.
>>>> Does this series fix all issues Bernhard, Thomas and Julian 
>>>> stumbled over?
>>>>
>>>> Jan
>>> I will try this today. Thanks.
>>>
>> Yes, it works for us (Thomas + Bernhard).
>>
> Great. Thanks for testing.
> 

Same here: No obvious regressions found while running various NMI/IRQ tests.

Jan
diff mbox

Patch

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 82ada75..85574b7 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -225,6 +225,7 @@  struct __attribute__ ((__packed__)) vmcb {
 #define SVM_EVTINJ_VALID_ERR (1 << 11)
 
 #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
+#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
 
 #define	SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
 #define	SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1fcbc17..3ffb695 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1823,17 +1823,28 @@  static int task_switch_interception(struct vcpu_svm *svm,
 				    struct kvm_run *kvm_run)
 {
 	u16 tss_selector;
+	int reason;
+	int int_type = svm->vmcb->control.exit_int_info &
+		SVM_EXITINTINFO_TYPE_MASK;
 
 	tss_selector = (u16)svm->vmcb->control.exit_info_1;
+
 	if (svm->vmcb->control.exit_info_2 &
 	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_IRET);
-	if (svm->vmcb->control.exit_info_2 &
-	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
-		return kvm_task_switch(&svm->vcpu, tss_selector,
-				       TASK_SWITCH_JMP);
-	return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
+		reason = TASK_SWITCH_IRET;
+	else if (svm->vmcb->control.exit_info_2 &
+		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+		reason = TASK_SWITCH_JMP;
+	else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID)
+		reason = TASK_SWITCH_GATE;
+	else
+		reason = TASK_SWITCH_CALL;
+
+
+	if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT)
+		skip_emulated_instruction(&svm->vcpu);
+
+	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0da7a9e..01db958 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3025,22 +3025,40 @@  static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long exit_qualification;
 	u16 tss_selector;
-	int reason;
+	int reason, type, idt_v;
+
+	idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
+	type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
 	reason = (u32)exit_qualification >> 30;
-	if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-	    (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
-	    == INTR_TYPE_NMI_INTR) {
-		vcpu->arch.nmi_injected = false;
-		if (cpu_has_virtual_nmis())
-			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-				      GUEST_INTR_STATE_NMI);
+	if (reason == TASK_SWITCH_GATE && idt_v) {
+		switch (type) {
+		case INTR_TYPE_NMI_INTR:
+			vcpu->arch.nmi_injected = false;
+			if (cpu_has_virtual_nmis())
+				vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+					      GUEST_INTR_STATE_NMI);
+			break;
+		case INTR_TYPE_EXT_INTR:
+			kvm_clear_interrupt_queue(vcpu);
+			break;
+		case INTR_TYPE_HARD_EXCEPTION:
+		case INTR_TYPE_SOFT_EXCEPTION:
+			kvm_clear_exception_queue(vcpu);
+			break;
+		default:
+			break;
+		}
 	}
 	tss_selector = exit_qualification;
 
+	if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
+		       type != INTR_TYPE_EXT_INTR &&
+		       type != INTR_TYPE_NMI_INTR))
+		skip_emulated_instruction(vcpu);
+
 	if (!kvm_task_switch(vcpu, tss_selector, reason))
 		return 0;
 
@@ -3292,8 +3310,8 @@  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
 	vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
 	type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
-	
-	switch(type) {
+
+	switch (type) {
 	case INTR_TYPE_NMI_INTR:
 		vmx->vcpu.arch.nmi_injected = true;
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae4918c..573bb3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3697,7 +3697,6 @@  static void save_state_to_tss32(struct kvm_vcpu *vcpu,
 	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
 	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
 	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
-	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
 }
 
 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
@@ -3794,8 +3793,8 @@  static int load_state_from_tss16(struct kvm_vcpu *vcpu,
 }
 
 static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
-		       struct desc_struct *nseg_desc)
+			      u16 old_tss_sel, u32 old_tss_base,
+			      struct desc_struct *nseg_desc)
 {
 	struct tss_segment_16 tss_segment_16;
 	int ret = 0;
@@ -3814,6 +3813,16 @@  static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
 			   &tss_segment_16, sizeof tss_segment_16))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_16.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_16.prev_task_link,
+				    sizeof tss_segment_16.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss16(vcpu, &tss_segment_16))
 		goto out;
 
@@ -3823,7 +3832,7 @@  out:
 }
 
 static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
-		       u32 old_tss_base,
+		       u16 old_tss_sel, u32 old_tss_base,
 		       struct desc_struct *nseg_desc)
 {
 	struct tss_segment_32 tss_segment_32;
@@ -3843,6 +3852,16 @@  static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
 			   &tss_segment_32, sizeof tss_segment_32))
 		goto out;
 
+	if (old_tss_sel != 0xffff) {
+		tss_segment_32.prev_task_link = old_tss_sel;
+
+		if (kvm_write_guest(vcpu->kvm,
+				    get_tss_base_addr(vcpu, nseg_desc),
+				    &tss_segment_32.prev_task_link,
+				    sizeof tss_segment_32.prev_task_link))
+			goto out;
+	}
+
 	if (load_state_from_tss32(vcpu, &tss_segment_32))
 		goto out;
 
@@ -3896,14 +3915,17 @@  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
 	}
 
-	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	/* set back link to prev task only if NT bit is set in eflags
+	   note that old_tss_sel is not used afetr this point */
+	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
+		old_tss_sel = 0xffff;
 
 	if (nseg_desc.type & 8)
-		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 	else
-		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
-					 &nseg_desc);
+		ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
+					 old_tss_base, &nseg_desc);
 
 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
 		u32 eflags = kvm_x86_ops->get_rflags(vcpu);