From patchwork Sun Mar 29 14:12:18 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Gleb Natapov X-Patchwork-Id: 14981 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2TECPVN025361 for ; Sun, 29 Mar 2009 14:12:26 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756050AbZC2OMW (ORCPT ); Sun, 29 Mar 2009 10:12:22 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756061AbZC2OMW (ORCPT ); Sun, 29 Mar 2009 10:12:22 -0400 Received: from mx2.redhat.com ([66.187.237.31]:33030 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755498AbZC2OMV (ORCPT ); Sun, 29 Mar 2009 10:12:21 -0400 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n2TECJLQ011379 for ; Sun, 29 Mar 2009 10:12:19 -0400 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n2TECJ2X012933; Sun, 29 Mar 2009 10:12:20 -0400 Received: from dhcp-1-237.tlv.redhat.com (dhcp-1-237.tlv.redhat.com [10.35.1.237]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n2TECI5u029885; Sun, 29 Mar 2009 10:12:19 -0400 Received: from trex.usersys.redhat.com (localhost [127.0.0.1]) by dhcp-1-237.tlv.redhat.com (Postfix) with ESMTP id 8188B18D395; Sun, 29 Mar 2009 17:12:18 +0300 (IDT) From: Gleb Natapov Subject: [PATCH 4/4] Fix task switching. To: avi@redhat.com Cc: kvm@vger.kernel.org Date: Sun, 29 Mar 2009 17:12:18 +0300 Message-ID: <20090329141218.30481.92675.stgit@trex.usersys.redhat.com> In-Reply-To: <20090329141202.30481.91797.stgit@trex.usersys.redhat.com> References: <20090329141202.30481.91797.stgit@trex.usersys.redhat.com> User-Agent: StGIT/0.14.2 MIME-Version: 1.0 X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org The patch fixes two problems with task switching. 1. Back link is written to a wrong TSS. 2. Instruction emulation is not needed if the reason for task switch is a task gate in IDT and access to it is caused by an external even. 2 is currently solved only for VMX since there is not reliable way to skip an instruction in SVM. We should emulate it instead. Signed-off-by: Gleb Natapov --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm.c | 25 ++++++++++++++++++------- arch/x86/kvm/vmx.c | 40 +++++++++++++++++++++++++++++----------- arch/x86/kvm/x86.c | 40 +++++++++++++++++++++++++++++++--------- 4 files changed, 79 insertions(+), 27 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75..85574b7 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fcbc17..3ffb695 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1823,17 +1823,28 @@ static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u16 tss_selector; + int reason; + int int_type = svm->vmcb->control.exit_int_info & + SVM_EXITINTINFO_TYPE_MASK; tss_selector = (u16)svm->vmcb->control.exit_info_1; + if (svm->vmcb->control.exit_info_2 & (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_IRET); - if (svm->vmcb->control.exit_info_2 & - (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_JMP); - return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); + reason = TASK_SWITCH_IRET; + else if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) + reason = TASK_SWITCH_JMP; + else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID) + reason = TASK_SWITCH_GATE; + else + reason = TASK_SWITCH_CALL; + + + if (reason != TASK_SWITCH_GATE || int_type == SVM_EXITINTINFO_TYPE_SOFT) + skip_emulated_instruction(&svm->vcpu); + + return kvm_task_switch(&svm->vcpu, tss_selector, reason); } static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0da7a9e..01db958 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3025,22 +3025,40 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; - int reason; + int reason, type, idt_v; + + idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); + type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) - == INTR_TYPE_NMI_INTR) { - vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + if (reason == TASK_SWITCH_GATE && idt_v) { + switch (type) { + case INTR_TYPE_NMI_INTR: + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + break; + case INTR_TYPE_EXT_INTR: + kvm_clear_interrupt_queue(vcpu); + break; + case INTR_TYPE_HARD_EXCEPTION: + case INTR_TYPE_SOFT_EXCEPTION: + kvm_clear_exception_queue(vcpu); + break; + default: + break; + } } tss_selector = exit_qualification; + if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && + type != INTR_TYPE_EXT_INTR && + type != INTR_TYPE_NMI_INTR)) + skip_emulated_instruction(vcpu); + if (!kvm_task_switch(vcpu, tss_selector, reason)) return 0; @@ -3292,8 +3310,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - - switch(type) { + + switch (type) { case INTR_TYPE_NMI_INTR: vmx->vcpu.arch.nmi_injected = true; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ae4918c..573bb3f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3697,7 +3697,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); - tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); } static int load_state_from_tss32(struct kvm_vcpu *vcpu, @@ -3794,8 +3793,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, } static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, - struct desc_struct *nseg_desc) + u16 old_tss_sel, u32 old_tss_base, + struct desc_struct *nseg_desc) { struct tss_segment_16 tss_segment_16; int ret = 0; @@ -3814,6 +3813,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_16, sizeof tss_segment_16)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_16.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_16.prev_task_link, + sizeof tss_segment_16.prev_task_link)) + goto out; + } + if (load_state_from_tss16(vcpu, &tss_segment_16)) goto out; @@ -3823,7 +3832,7 @@ out: } static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, + u16 old_tss_sel, u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_32 tss_segment_32; @@ -3843,6 +3852,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_32, sizeof tss_segment_32)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_32.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_32.prev_task_link, + sizeof tss_segment_32.prev_task_link)) + goto out; + } + if (load_state_from_tss32(vcpu, &tss_segment_32)) goto out; @@ -3896,14 +3915,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } - kvm_x86_ops->skip_emulated_instruction(vcpu); + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); else - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { u32 eflags = kvm_x86_ops->get_rflags(vcpu);