Message ID | 1378433091-18233-1-git-send-email-yzt356@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: > This patch contains the following two changes: > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > with some reasons not emulated by L1, preemption timer value should > be save in such exits. > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > to nVMX. > > With this patch, nested VMX preemption timer features are fully > supported. > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > ChangeLog to v3: > Move nested_adjust_preemption_timer to the latest place just before vmenter. > Some minor changes. > > arch/x86/include/uapi/asm/msr-index.h | 1 + > arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- > 2 files changed, 48 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > index bb04650..b93e09a 100644 > --- a/arch/x86/include/uapi/asm/msr-index.h > +++ b/arch/x86/include/uapi/asm/msr-index.h > @@ -536,6 +536,7 @@ > > /* MSR_IA32_VMX_MISC bits */ > #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) > +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F > /* AMD-V MSRs */ > > #define MSR_VM_CR 0xc0010114 > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 1f1da43..f364d16 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -374,6 +374,8 @@ struct nested_vmx { > */ > struct page *apic_access_page; > u64 msr_ia32_feature_control; > + /* Set if vmexit is L2->L1 */ > + bool nested_vmx_exit; > }; > > #define POSTED_INTR_ON 0 > @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) > #ifdef CONFIG_X86_64 > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + if (!(nested_vmx_pinbased_ctls_high & > + PIN_BASED_VMX_PREEMPTION_TIMER) || > + !(nested_vmx_exit_ctls_high & > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { Align this under the other "!". Also, I prefer to have one long line for the whole "!(... & ...) ||" (and likewise below), but I don't know if Gleb agrees > + nested_vmx_exit_ctls_high &= > + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and likewise elsewhere in the patch. > + nested_vmx_pinbased_ctls_high &= > + (~PIN_BASED_VMX_PREEMPTION_TIMER); > + } > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER); > > @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > } > > +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) > +{ > + u64 delta_tsc_l1; > + u32 preempt_val_l1, preempt_val_l2, preempt_scale; Should this exit immediately if the preemption timer pin-based control is disabled? > + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & > + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; > + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, > + native_read_tsc()) - vcpu->arch.last_guest_tsc; Please format this like: delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()) - vcpu->arch.last_guest_tsc; > + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; > + if (preempt_val_l2 <= preempt_val_l1) > + preempt_val_l2 = 0; > + else > + preempt_val_l2 -= preempt_val_l1; > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); Did you test that a value of 0 triggers an immediate exit, rather than counting down by 2^32? Perhaps it's safer to limit the value to 1 instead of 0. > +} > + > /* > * The guest has exited. See if we can fix it or if we need userspace > * assistance. > @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > vmx->nested.nested_run_pending = 0; > > if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { > + vmx->nested.nested_vmx_exit = true; I think this assignment should be in nested_vmx_vmexit, since it is called from other places as well. > nested_vmx_vmexit(vcpu); > return 1; > } > + vmx->nested.nested_vmx_exit = false; > > if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > debugctlmsr = get_debugctlmsr(); > > vmx->__launched = vmx->loaded_vmcs->launched; > + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > + nested_adjust_preemption_timer(vcpu); Please leave the assignment to __launched last, since it's already initializing the asm below. I don't like the is_guest_mode check here... Maybe it's micro-optimizing, but I wonder if we already do too many checks in vmx_vcpu_run... For example, is_guest_mode could be changed (I think) to a check for "vmx->loaded_vmcs == &vmx->vmcs1". Alternatively, we could change nested_vmx_exit to an enum in struct vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in vmx_handle_exit. Then we could check directly for L0->L2 and not adjust the preemption timer in other cases. In fact, I suspect this enum could replace HF_GUEST_MASK altogether. However, this would require some other, more complicated, changes to svm.c. Gleb, what do you think? Paolo > asm( > /* Store host registers */ > "push %%" _ASM_DX "; push %%" _ASM_BP ";" > @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > u32 exec_control; > + u32 exit_control; > > vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); > vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); > @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER > * bits are further modified by vmx_set_efer() below. > */ > - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > + exit_control = vmcs_config.vmexit_ctrl; > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + vmcs_write32(VM_EXIT_CONTROLS, exit_control); > > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > > + if ((vmcs12->pin_based_vm_exec_control & > + PIN_BASED_VMX_PREEMPTION_TIMER) && > + (vmcs12->vm_exit_controls & > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > + vmcs12->vmx_preemption_timer_value = > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + > /* > * In some cases (usually, nested EPT), L2 is allowed to change its > * own CR3 without exiting. If it has changed it, we must keep it. > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-09-13 19:15, Paolo Bonzini wrote: > Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: >> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; >> + if (preempt_val_l2 <= preempt_val_l1) >> + preempt_val_l2 = 0; >> + else >> + preempt_val_l2 -= preempt_val_l1; >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > > Did you test that a value of 0 triggers an immediate exit, rather than > counting down by 2^32? Perhaps it's safer to limit the value to 1 > instead of 0. To my experience, 0 triggers immediate exists when the preemption timer is enabled. Jan
On Sat, Sep 14, 2013 at 3:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-09-13 19:15, Paolo Bonzini wrote: >> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: >>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; >>> + if (preempt_val_l2 <= preempt_val_l1) >>> + preempt_val_l2 = 0; >>> + else >>> + preempt_val_l2 -= preempt_val_l1; >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); >> >> Did you test that a value of 0 triggers an immediate exit, rather than >> counting down by 2^32? Perhaps it's safer to limit the value to 1 >> instead of 0. > > To my experience, 0 triggers immediate exists when the preemption timer > is enabled. Yes, L2 VM will exit immediately when the value is 0 with my patch. Arthur > > Jan > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote: > This patch contains the following two changes: > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > with some reasons not emulated by L1, preemption timer value should > be save in such exits. > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > to nVMX. > > With this patch, nested VMX preemption timer features are fully > supported. > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > ChangeLog to v3: > Move nested_adjust_preemption_timer to the latest place just before vmenter. > Some minor changes. > > arch/x86/include/uapi/asm/msr-index.h | 1 + > arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- > 2 files changed, 48 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > index bb04650..b93e09a 100644 > --- a/arch/x86/include/uapi/asm/msr-index.h > +++ b/arch/x86/include/uapi/asm/msr-index.h > @@ -536,6 +536,7 @@ > > /* MSR_IA32_VMX_MISC bits */ > #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) > +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F > /* AMD-V MSRs */ > > #define MSR_VM_CR 0xc0010114 > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 1f1da43..f364d16 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -374,6 +374,8 @@ struct nested_vmx { > */ > struct page *apic_access_page; > u64 msr_ia32_feature_control; > + /* Set if vmexit is L2->L1 */ > + bool nested_vmx_exit; Do not see why it is needed, see bellow. > }; > > #define POSTED_INTR_ON 0 > @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) > #ifdef CONFIG_X86_64 > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + if (!(nested_vmx_pinbased_ctls_high & > + PIN_BASED_VMX_PREEMPTION_TIMER) || > + !(nested_vmx_exit_ctls_high & > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > + nested_vmx_exit_ctls_high &= > + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > + nested_vmx_pinbased_ctls_high &= > + (~PIN_BASED_VMX_PREEMPTION_TIMER); > + } > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER); > > @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > } > > +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) > +{ > + u64 delta_tsc_l1; > + u32 preempt_val_l1, preempt_val_l2, preempt_scale; > + > + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & > + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; > + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, > + native_read_tsc()) - vcpu->arch.last_guest_tsc; > + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; > + if (preempt_val_l2 <= preempt_val_l1) > + preempt_val_l2 = 0; > + else > + preempt_val_l2 -= preempt_val_l1; > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > +} > + > /* > * The guest has exited. See if we can fix it or if we need userspace > * assistance. > @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > vmx->nested.nested_run_pending = 0; > > if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { > + vmx->nested.nested_vmx_exit = true; > nested_vmx_vmexit(vcpu); > return 1; > } > + vmx->nested.nested_vmx_exit = false; > > if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > debugctlmsr = get_debugctlmsr(); > > vmx->__launched = vmx->loaded_vmcs->launched; > + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) How is_guest_mode() and nested_vmx_exi can be both true? The only place nested_vmx_exit is set to true is just before call to nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes is_guest_mode() false. To enter guest mode again at least one other vmexit from L1 to L0 is needed at which point nested_vmx_exit will be reset to false again. If you want to avoid calling nested_adjust_preemption_timer() during vmlaunch/vmresume emulation (and it looks like this is what you are trying to achieve here) you can check nested_run_pending. > + nested_adjust_preemption_timer(vcpu); > asm( > /* Store host registers */ > "push %%" _ASM_DX "; push %%" _ASM_BP ";" > @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > u32 exec_control; > + u32 exit_control; > > vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); > vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); > @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER > * bits are further modified by vmx_set_efer() below. > */ > - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > + exit_control = vmcs_config.vmexit_ctrl; > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + vmcs_write32(VM_EXIT_CONTROLS, exit_control); > > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > > + if ((vmcs12->pin_based_vm_exec_control & > + PIN_BASED_VMX_PREEMPTION_TIMER) && > + (vmcs12->vm_exit_controls & > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > + vmcs12->vmx_preemption_timer_value = > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + > /* > * In some cases (usually, nested EPT), L2 is allowed to change its > * own CR3 without exiting. If it has changed it, we must keep it. > -- > 1.7.9.5 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Sep 14, 2013 at 1:15 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: >> This patch contains the following two changes: >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> with some reasons not emulated by L1, preemption timer value should >> be save in such exits. >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> to nVMX. >> >> With this patch, nested VMX preemption timer features are fully >> supported. >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> ChangeLog to v3: >> Move nested_adjust_preemption_timer to the latest place just before vmenter. >> Some minor changes. >> >> arch/x86/include/uapi/asm/msr-index.h | 1 + >> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- >> 2 files changed, 48 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h >> index bb04650..b93e09a 100644 >> --- a/arch/x86/include/uapi/asm/msr-index.h >> +++ b/arch/x86/include/uapi/asm/msr-index.h >> @@ -536,6 +536,7 @@ >> >> /* MSR_IA32_VMX_MISC bits */ >> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) >> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F >> /* AMD-V MSRs */ >> >> #define MSR_VM_CR 0xc0010114 >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 1f1da43..f364d16 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -374,6 +374,8 @@ struct nested_vmx { >> */ >> struct page *apic_access_page; >> u64 msr_ia32_feature_control; >> + /* Set if vmexit is L2->L1 */ >> + bool nested_vmx_exit; >> }; >> >> #define POSTED_INTR_ON 0 >> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) >> #ifdef CONFIG_X86_64 >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + if (!(nested_vmx_pinbased_ctls_high & >> + PIN_BASED_VMX_PREEMPTION_TIMER) || >> + !(nested_vmx_exit_ctls_high & >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > > Align this under the other "!". Also, I prefer to have one long line > for the whole "!(... & ...) ||" (and likewise below), but I don't know > if Gleb agrees > >> + nested_vmx_exit_ctls_high &= >> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > > Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and > likewise elsewhere in the patch. > >> + nested_vmx_pinbased_ctls_high &= >> + (~PIN_BASED_VMX_PREEMPTION_TIMER); >> + } >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER); >> >> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); >> } >> >> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) >> +{ >> + u64 delta_tsc_l1; >> + u32 preempt_val_l1, preempt_val_l2, preempt_scale; > > Should this exit immediately if the preemption timer pin-based control > is disabled? Hi Paolo, How can I get pin-based control here from "struct kvm_vcpu *vcpu"? Arthur > >> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & >> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; >> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, >> + native_read_tsc()) - vcpu->arch.last_guest_tsc; > > Please format this like: > > delta_tsc_l1 = > kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()) > - vcpu->arch.last_guest_tsc; > >> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; >> + if (preempt_val_l2 <= preempt_val_l1) >> + preempt_val_l2 = 0; >> + else >> + preempt_val_l2 -= preempt_val_l1; >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > > Did you test that a value of 0 triggers an immediate exit, rather than > counting down by 2^32? Perhaps it's safer to limit the value to 1 > instead of 0. > >> +} >> + >> /* >> * The guest has exited. See if we can fix it or if we need userspace >> * assistance. >> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) >> vmx->nested.nested_run_pending = 0; >> >> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { >> + vmx->nested.nested_vmx_exit = true; > > I think this assignment should be in nested_vmx_vmexit, since it is > called from other places as well. > >> nested_vmx_vmexit(vcpu); >> return 1; >> } >> + vmx->nested.nested_vmx_exit = false; >> >> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { >> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; >> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> debugctlmsr = get_debugctlmsr(); >> >> vmx->__launched = vmx->loaded_vmcs->launched; >> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) >> + nested_adjust_preemption_timer(vcpu); > > Please leave the assignment to __launched last, since it's already > initializing the asm below. > > I don't like the is_guest_mode check here... Maybe it's > micro-optimizing, but I wonder if we already do too many checks in > vmx_vcpu_run... For example, is_guest_mode could be changed (I think) > to a check for "vmx->loaded_vmcs == &vmx->vmcs1". > > Alternatively, we could change nested_vmx_exit to an enum in struct > vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in > vmx_handle_exit. Then we could check directly for L0->L2 and not adjust > the preemption timer in other cases. In fact, I suspect this enum could > replace HF_GUEST_MASK altogether. However, this would require some > other, more complicated, changes to svm.c. > > Gleb, what do you think? > > Paolo > >> asm( >> /* Store host registers */ >> "push %%" _ASM_DX "; push %%" _ASM_BP ";" >> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> u32 exec_control; >> + u32 exit_control; >> >> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); >> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); >> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >> * bits are further modified by vmx_set_efer() below. >> */ >> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> + exit_control = vmcs_config.vmexit_ctrl; >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + vmcs_write32(VM_EXIT_CONTROLS, exit_control); >> >> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >> * emulated by vmx_set_efer(), below. >> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> vmcs12->guest_pending_dbg_exceptions = >> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); >> >> + if ((vmcs12->pin_based_vm_exec_control & >> + PIN_BASED_VMX_PREEMPTION_TIMER) && >> + (vmcs12->vm_exit_controls & >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) >> + vmcs12->vmx_preemption_timer_value = >> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + >> /* >> * In some cases (usually, nested EPT), L2 is allowed to change its >> * own CR3 without exiting. If it has changed it, we must keep it. >> >
On Sun, Sep 15, 2013 at 8:31 PM, Gleb Natapov <gleb@redhat.com> wrote: > On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote: >> This patch contains the following two changes: >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> with some reasons not emulated by L1, preemption timer value should >> be save in such exits. >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> to nVMX. >> >> With this patch, nested VMX preemption timer features are fully >> supported. >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> ChangeLog to v3: >> Move nested_adjust_preemption_timer to the latest place just before vmenter. >> Some minor changes. >> >> arch/x86/include/uapi/asm/msr-index.h | 1 + >> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- >> 2 files changed, 48 insertions(+), 2 deletions(-) >> >> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h >> index bb04650..b93e09a 100644 >> --- a/arch/x86/include/uapi/asm/msr-index.h >> +++ b/arch/x86/include/uapi/asm/msr-index.h >> @@ -536,6 +536,7 @@ >> >> /* MSR_IA32_VMX_MISC bits */ >> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) >> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F >> /* AMD-V MSRs */ >> >> #define MSR_VM_CR 0xc0010114 >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 1f1da43..f364d16 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -374,6 +374,8 @@ struct nested_vmx { >> */ >> struct page *apic_access_page; >> u64 msr_ia32_feature_control; >> + /* Set if vmexit is L2->L1 */ >> + bool nested_vmx_exit; > Do not see why it is needed, see bellow. > >> }; >> >> #define POSTED_INTR_ON 0 >> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) >> #ifdef CONFIG_X86_64 >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + if (!(nested_vmx_pinbased_ctls_high & >> + PIN_BASED_VMX_PREEMPTION_TIMER) || >> + !(nested_vmx_exit_ctls_high & >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { >> + nested_vmx_exit_ctls_high &= >> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >> + nested_vmx_pinbased_ctls_high &= >> + (~PIN_BASED_VMX_PREEMPTION_TIMER); >> + } >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER); >> >> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); >> } >> >> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) >> +{ >> + u64 delta_tsc_l1; >> + u32 preempt_val_l1, preempt_val_l2, preempt_scale; >> + >> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & >> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; >> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, >> + native_read_tsc()) - vcpu->arch.last_guest_tsc; >> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; >> + if (preempt_val_l2 <= preempt_val_l1) >> + preempt_val_l2 = 0; >> + else >> + preempt_val_l2 -= preempt_val_l1; >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); >> +} >> + >> /* >> * The guest has exited. See if we can fix it or if we need userspace >> * assistance. >> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) >> vmx->nested.nested_run_pending = 0; >> >> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { >> + vmx->nested.nested_vmx_exit = true; >> nested_vmx_vmexit(vcpu); >> return 1; >> } >> + vmx->nested.nested_vmx_exit = false; >> >> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { >> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; >> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> debugctlmsr = get_debugctlmsr(); >> >> vmx->__launched = vmx->loaded_vmcs->launched; >> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > How is_guest_mode() and nested_vmx_exi can be both true? The only place > nested_vmx_exit is set to true is just before call to > nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes > is_guest_mode() false. To enter guest mode again at least one other > vmexit from L1 to L0 is needed at which point nested_vmx_exit will be > reset to false again. > > If you want to avoid calling nested_adjust_preemption_timer() during > vmlaunch/vmresume emulation (and it looks like this is what you are > trying to achieve here) you can check nested_run_pending. Besides vmlaunch/vmresume emulation, every exit from L2->L1 should not call nested_adjust_preemption_timer(), this function is just used to adjust preemption timer when L2->L0->L2. Can nested_run_pending distinguish this? Arthur > > >> + nested_adjust_preemption_timer(vcpu); >> asm( >> /* Store host registers */ >> "push %%" _ASM_DX "; push %%" _ASM_BP ";" >> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> u32 exec_control; >> + u32 exit_control; >> >> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); >> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); >> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >> * bits are further modified by vmx_set_efer() below. >> */ >> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> + exit_control = vmcs_config.vmexit_ctrl; >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + vmcs_write32(VM_EXIT_CONTROLS, exit_control); >> >> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >> * emulated by vmx_set_efer(), below. >> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> vmcs12->guest_pending_dbg_exceptions = >> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); >> >> + if ((vmcs12->pin_based_vm_exec_control & >> + PIN_BASED_VMX_PREEMPTION_TIMER) && >> + (vmcs12->vm_exit_controls & >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) >> + vmcs12->vmx_preemption_timer_value = >> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + >> /* >> * In some cases (usually, nested EPT), L2 is allowed to change its >> * own CR3 without exiting. If it has changed it, we must keep it. >> -- >> 1.7.9.5 > > -- > Gleb.
On Mon, Sep 16, 2013 at 01:49:41PM +0800, Arthur Chunqi Li wrote: > On Sun, Sep 15, 2013 at 8:31 PM, Gleb Natapov <gleb@redhat.com> wrote: > > On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote: > >> This patch contains the following two changes: > >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > >> with some reasons not emulated by L1, preemption timer value should > >> be save in such exits. > >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > >> to nVMX. > >> > >> With this patch, nested VMX preemption timer features are fully > >> supported. > >> > >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > >> --- > >> ChangeLog to v3: > >> Move nested_adjust_preemption_timer to the latest place just before vmenter. > >> Some minor changes. > >> > >> arch/x86/include/uapi/asm/msr-index.h | 1 + > >> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- > >> 2 files changed, 48 insertions(+), 2 deletions(-) > >> > >> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > >> index bb04650..b93e09a 100644 > >> --- a/arch/x86/include/uapi/asm/msr-index.h > >> +++ b/arch/x86/include/uapi/asm/msr-index.h > >> @@ -536,6 +536,7 @@ > >> > >> /* MSR_IA32_VMX_MISC bits */ > >> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) > >> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F > >> /* AMD-V MSRs */ > >> > >> #define MSR_VM_CR 0xc0010114 > >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > >> index 1f1da43..f364d16 100644 > >> --- a/arch/x86/kvm/vmx.c > >> +++ b/arch/x86/kvm/vmx.c > >> @@ -374,6 +374,8 @@ struct nested_vmx { > >> */ > >> struct page *apic_access_page; > >> u64 msr_ia32_feature_control; > >> + /* Set if vmexit is L2->L1 */ > >> + bool nested_vmx_exit; > > Do not see why it is needed, see bellow. > > > >> }; > >> > >> #define POSTED_INTR_ON 0 > >> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) > >> #ifdef CONFIG_X86_64 > >> VM_EXIT_HOST_ADDR_SPACE_SIZE | > >> #endif > >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > >> + if (!(nested_vmx_pinbased_ctls_high & > >> + PIN_BASED_VMX_PREEMPTION_TIMER) || > >> + !(nested_vmx_exit_ctls_high & > >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > >> + nested_vmx_exit_ctls_high &= > >> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > >> + nested_vmx_pinbased_ctls_high &= > >> + (~PIN_BASED_VMX_PREEMPTION_TIMER); > >> + } > >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > >> VM_EXIT_LOAD_IA32_EFER); > >> > >> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > >> } > >> > >> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) > >> +{ > >> + u64 delta_tsc_l1; > >> + u32 preempt_val_l1, preempt_val_l2, preempt_scale; > >> + > >> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & > >> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; > >> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > >> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, > >> + native_read_tsc()) - vcpu->arch.last_guest_tsc; > >> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; > >> + if (preempt_val_l2 <= preempt_val_l1) > >> + preempt_val_l2 = 0; > >> + else > >> + preempt_val_l2 -= preempt_val_l1; > >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > >> +} > >> + > >> /* > >> * The guest has exited. See if we can fix it or if we need userspace > >> * assistance. > >> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > >> vmx->nested.nested_run_pending = 0; > >> > >> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { > >> + vmx->nested.nested_vmx_exit = true; > >> nested_vmx_vmexit(vcpu); > >> return 1; > >> } > >> + vmx->nested.nested_vmx_exit = false; > >> > >> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { > >> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > >> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > >> debugctlmsr = get_debugctlmsr(); > >> > >> vmx->__launched = vmx->loaded_vmcs->launched; > >> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > > How is_guest_mode() and nested_vmx_exi can be both true? The only place > > nested_vmx_exit is set to true is just before call to > > nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes > > is_guest_mode() false. To enter guest mode again at least one other > > vmexit from L1 to L0 is needed at which point nested_vmx_exit will be > > reset to false again. > > > > If you want to avoid calling nested_adjust_preemption_timer() during > > vmlaunch/vmresume emulation (and it looks like this is what you are > > trying to achieve here) you can check nested_run_pending. > Besides vmlaunch/vmresume emulation, every exit from L2->L1 should not > call nested_adjust_preemption_timer(), this function is just used to > adjust preemption timer when L2->L0->L2. Can nested_run_pending > distinguish this? > On L2->L1 exit is_guest_mode() will be false during vmx_vcpu_run(). On L1->L2 entry nested_run_pending will be true. On L0->L2 entry is_guest_mode() == true && nested_run_pending == false. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-09-16 07:42, Arthur Chunqi Li wrote: > On Sat, Sep 14, 2013 at 1:15 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: >> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: >>> This patch contains the following two changes: >>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>> with some reasons not emulated by L1, preemption timer value should >>> be save in such exits. >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>> to nVMX. >>> >>> With this patch, nested VMX preemption timer features are fully >>> supported. >>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>> --- >>> ChangeLog to v3: >>> Move nested_adjust_preemption_timer to the latest place just before vmenter. >>> Some minor changes. >>> >>> arch/x86/include/uapi/asm/msr-index.h | 1 + >>> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- >>> 2 files changed, 48 insertions(+), 2 deletions(-) >>> >>> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h >>> index bb04650..b93e09a 100644 >>> --- a/arch/x86/include/uapi/asm/msr-index.h >>> +++ b/arch/x86/include/uapi/asm/msr-index.h >>> @@ -536,6 +536,7 @@ >>> >>> /* MSR_IA32_VMX_MISC bits */ >>> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) >>> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F >>> /* AMD-V MSRs */ >>> >>> #define MSR_VM_CR 0xc0010114 >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>> index 1f1da43..f364d16 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -374,6 +374,8 @@ struct nested_vmx { >>> */ >>> struct page *apic_access_page; >>> u64 msr_ia32_feature_control; >>> + /* Set if vmexit is L2->L1 */ >>> + bool nested_vmx_exit; >>> }; >>> >>> #define POSTED_INTR_ON 0 >>> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) >>> #ifdef CONFIG_X86_64 >>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>> #endif >>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >>> + if (!(nested_vmx_pinbased_ctls_high & >>> + PIN_BASED_VMX_PREEMPTION_TIMER) || >>> + !(nested_vmx_exit_ctls_high & >>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { >> >> Align this under the other "!". Also, I prefer to have one long line >> for the whole "!(... & ...) ||" (and likewise below), but I don't know >> if Gleb agrees >> >>> + nested_vmx_exit_ctls_high &= >>> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >> >> Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and >> likewise elsewhere in the patch. >> >>> + nested_vmx_pinbased_ctls_high &= >>> + (~PIN_BASED_VMX_PREEMPTION_TIMER); >>> + } >>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>> VM_EXIT_LOAD_IA32_EFER); >>> >>> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) >>> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); >>> } >>> >>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) >>> +{ >>> + u64 delta_tsc_l1; >>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale; >> >> Should this exit immediately if the preemption timer pin-based control >> is disabled? > Hi Paolo, > How can I get pin-based control here from "struct kvm_vcpu *vcpu"? You can find get_vmcs12(vcpu)->pin_based_vm_exec_control in existing code. Jan
On Fri, Sep 13, 2013 at 07:15:11PM +0200, Paolo Bonzini wrote: > Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: > > This patch contains the following two changes: > > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > > with some reasons not emulated by L1, preemption timer value should > > be save in such exits. > > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > > to nVMX. > > > > With this patch, nested VMX preemption timer features are fully > > supported. > > > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > > --- > > ChangeLog to v3: > > Move nested_adjust_preemption_timer to the latest place just before vmenter. > > Some minor changes. > > > > arch/x86/include/uapi/asm/msr-index.h | 1 + > > arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- > > 2 files changed, 48 insertions(+), 2 deletions(-) > > > > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h > > index bb04650..b93e09a 100644 > > --- a/arch/x86/include/uapi/asm/msr-index.h > > +++ b/arch/x86/include/uapi/asm/msr-index.h > > @@ -536,6 +536,7 @@ > > > > /* MSR_IA32_VMX_MISC bits */ > > #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) > > +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F > > /* AMD-V MSRs */ > > > > #define MSR_VM_CR 0xc0010114 > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 1f1da43..f364d16 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -374,6 +374,8 @@ struct nested_vmx { > > */ > > struct page *apic_access_page; > > u64 msr_ia32_feature_control; > > + /* Set if vmexit is L2->L1 */ > > + bool nested_vmx_exit; > > }; > > > > #define POSTED_INTR_ON 0 > > @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) > > #ifdef CONFIG_X86_64 > > VM_EXIT_HOST_ADDR_SPACE_SIZE | > > #endif > > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > > + if (!(nested_vmx_pinbased_ctls_high & > > + PIN_BASED_VMX_PREEMPTION_TIMER) || > > + !(nested_vmx_exit_ctls_high & > > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > > Align this under the other "!". Also, I prefer to have one long line > for the whole "!(... & ...) ||" (and likewise below), but I don't know > if Gleb agrees > !(... & ...) || !(... & ...) fits perfectly to 80 chars. > > + nested_vmx_exit_ctls_high &= > > + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > > Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and > likewise elsewhere in the patch. > and line break too. > > + nested_vmx_pinbased_ctls_high &= > > + (~PIN_BASED_VMX_PREEMPTION_TIMER); > > + } > > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > > VM_EXIT_LOAD_IA32_EFER); > > > > @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > > } > > > > +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) > > +{ > > + u64 delta_tsc_l1; > > + u32 preempt_val_l1, preempt_val_l2, preempt_scale; > > Should this exit immediately if the preemption timer pin-based control > is disabled? > > > + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & > > + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; > > + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > > + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, > > + native_read_tsc()) - vcpu->arch.last_guest_tsc; > > Please format this like: > > delta_tsc_l1 = > kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()) > - vcpu->arch.last_guest_tsc; > And call vmx_read_l1_tsc() directly. Actually you can even use to_vmx(vcpu)->nested.vmcs01_tsc_offset directly here since the function will be called only when is_guest_mode() == true, but vmx_read_l1_tsc() may be more clear here and compile should optimize second is_guest_mode() check anyway. > > + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; > > + if (preempt_val_l2 <= preempt_val_l1) > > + preempt_val_l2 = 0; > > + else > > + preempt_val_l2 -= preempt_val_l1; > > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); > > Did you test that a value of 0 triggers an immediate exit, rather than > counting down by 2^32? Perhaps it's safer to limit the value to 1 > instead of 0. > > > +} > > + > > /* > > * The guest has exited. See if we can fix it or if we need userspace > > * assistance. > > @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > > vmx->nested.nested_run_pending = 0; > > > > if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { > > + vmx->nested.nested_vmx_exit = true; > > I think this assignment should be in nested_vmx_vmexit, since it is > called from other places as well. > > > nested_vmx_vmexit(vcpu); > > return 1; > > } > > + vmx->nested.nested_vmx_exit = false; > > > > if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { > > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; > > @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > > debugctlmsr = get_debugctlmsr(); > > > > vmx->__launched = vmx->loaded_vmcs->launched; > > + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > > + nested_adjust_preemption_timer(vcpu); > > Please leave the assignment to __launched last, since it's already > initializing the asm below. > > I don't like the is_guest_mode check here... Maybe it's > micro-optimizing, but I wonder if we already do too many checks in > vmx_vcpu_run... For example, is_guest_mode could be changed (I think) > to a check for "vmx->loaded_vmcs == &vmx->vmcs1". > Why this will be more efficient that HF_GUEST_MASK check? > Alternatively, we could change nested_vmx_exit to an enum in struct > vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in > vmx_handle_exit. Then we could check directly for L0->L2 and not adjust > the preemption timer in other cases. In fact, I suspect this enum could > replace HF_GUEST_MASK altogether. However, this would require some > other, more complicated, changes to svm.c. > > Gleb, what do you think? > I do not see why nested_vmx_exit is necessary at all yet. We can detect all aforementioned cases without. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 16/09/2013 09:44, Gleb Natapov ha scritto: > On Fri, Sep 13, 2013 at 07:15:11PM +0200, Paolo Bonzini wrote: >> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto: >>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) >>> +{ >>> + u64 delta_tsc_l1; >>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale; >> >> Should this exit immediately if the preemption timer pin-based control >> is disabled? >> >>> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & >>> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; >>> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, >>> + native_read_tsc()) - vcpu->arch.last_guest_tsc; >> >> Please format this like: >> >> delta_tsc_l1 = >> kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()) >> - vcpu->arch.last_guest_tsc; >> > And call vmx_read_l1_tsc() directly. Actually you can even use > to_vmx(vcpu)->nested.vmcs01_tsc_offset directly here since the function > will be called only when is_guest_mode() == true, but vmx_read_l1_tsc() > may be more clear here and compile should optimize second is_guest_mode() > check anyway. Right. Though I'm not that concerned about optimizing L1->L2 and L0->L2 entries; I'm more concerned about not slowing down L0->L1 (which includes the non-nested case, of course). >>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; >>> + if (preempt_val_l2 <= preempt_val_l1) >>> + preempt_val_l2 = 0; >>> + else >>> + preempt_val_l2 -= preempt_val_l1; >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); >> >> Did you test that a value of 0 triggers an immediate exit, rather than >> counting down by 2^32? Perhaps it's safer to limit the value to 1 >> instead of 0. >> >>> +} >>> + >>> /* >>> * The guest has exited. See if we can fix it or if we need userspace >>> * assistance. >>> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) >>> vmx->nested.nested_run_pending = 0; >>> >>> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { >>> + vmx->nested.nested_vmx_exit = true; >> >> I think this assignment should be in nested_vmx_vmexit, since it is >> called from other places as well. >> >>> nested_vmx_vmexit(vcpu); >>> return 1; >>> } >>> + vmx->nested.nested_vmx_exit = false; >>> >>> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { >>> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; >>> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >>> debugctlmsr = get_debugctlmsr(); >>> >>> vmx->__launched = vmx->loaded_vmcs->launched; >>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) >>> + nested_adjust_preemption_timer(vcpu); >> >> Please leave the assignment to __launched last, since it's already >> initializing the asm below. >> >> I don't like the is_guest_mode check here... Maybe it's >> micro-optimizing, but I wonder if we already do too many checks in >> vmx_vcpu_run... For example, is_guest_mode could be changed (I think) >> to a check for "vmx->loaded_vmcs == &vmx->vmcs1". >> > Why this will be more efficient that HF_GUEST_MASK check? Because we have already loaded vmx->loaded_vmcs, so it's one memory access less. >> Alternatively, we could change nested_vmx_exit to an enum in struct >> vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in >> vmx_handle_exit. Then we could check directly for L0->L2 and not adjust >> the preemption timer in other cases. In fact, I suspect this enum could >> replace HF_GUEST_MASK altogether. However, this would require some >> other, more complicated, changes to svm.c. >> >> Gleb, what do you think? >> > I do not see why nested_vmx_exit is necessary at all yet. We can detect > all aforementioned cases without. How do you distinguish L0->L2 from L1->L2 in vmx_vcpu_run? Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote: > >>> vmx->__launched = vmx->loaded_vmcs->launched; > >>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > >>> + nested_adjust_preemption_timer(vcpu); > >> > >> Please leave the assignment to __launched last, since it's already > >> initializing the asm below. > >> > >> I don't like the is_guest_mode check here... Maybe it's > >> micro-optimizing, but I wonder if we already do too many checks in > >> vmx_vcpu_run... For example, is_guest_mode could be changed (I think) > >> to a check for "vmx->loaded_vmcs == &vmx->vmcs1". > >> > > Why this will be more efficient that HF_GUEST_MASK check? > > Because we have already loaded vmx->loaded_vmcs, so it's one memory > access less. > But we will have to load vmx->vmcs1 instead :) Looks like very minor optimization if at all. If we could avoid additional if() at all somehow, may be mimicking vcpu->requests in vmx to get rid of most ifs there. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 16/09/2013 11:09, Gleb Natapov ha scritto: > On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote: >>>>> vmx->__launched = vmx->loaded_vmcs->launched; >>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) >>>>> + nested_adjust_preemption_timer(vcpu); >>>> >>>> Please leave the assignment to __launched last, since it's already >>>> initializing the asm below. >>>> >>>> I don't like the is_guest_mode check here... Maybe it's >>>> micro-optimizing, but I wonder if we already do too many checks in >>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think) >>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1". >>>> >>> Why this will be more efficient that HF_GUEST_MASK check? >> >> Because we have already loaded vmx->loaded_vmcs, so it's one memory >> access less. >> > But we will have to load vmx->vmcs1 instead :) That's not a memory load, it's an add. > Looks like very minor > optimization if at all. If we could avoid additional if() at all > somehow, may be mimicking vcpu->requests in vmx to get rid of most ifs > there. Yes. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Sep 16, 2013 at 11:49:40AM +0200, Paolo Bonzini wrote: > Il 16/09/2013 11:09, Gleb Natapov ha scritto: > > On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote: > >>>>> vmx->__launched = vmx->loaded_vmcs->launched; > >>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) > >>>>> + nested_adjust_preemption_timer(vcpu); > >>>> > >>>> Please leave the assignment to __launched last, since it's already > >>>> initializing the asm below. > >>>> > >>>> I don't like the is_guest_mode check here... Maybe it's > >>>> micro-optimizing, but I wonder if we already do too many checks in > >>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think) > >>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1". > >>>> > >>> Why this will be more efficient that HF_GUEST_MASK check? > >> > >> Because we have already loaded vmx->loaded_vmcs, so it's one memory > >> access less. > >> > > But we will have to load vmx->vmcs1 instead :) > > That's not a memory load, it's an add. > You assume that vmx->loaded_vmcs and vmx will be both in registers here, isn't it too much to assume? -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Il 16/09/2013 12:44, Gleb Natapov ha scritto: > On Mon, Sep 16, 2013 at 11:49:40AM +0200, Paolo Bonzini wrote: >> Il 16/09/2013 11:09, Gleb Natapov ha scritto: >>> On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote: >>>>>>> vmx->__launched = vmx->loaded_vmcs->launched; >>>>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) >>>>>>> + nested_adjust_preemption_timer(vcpu); >>>>>> >>>>>> Please leave the assignment to __launched last, since it's already >>>>>> initializing the asm below. >>>>>> >>>>>> I don't like the is_guest_mode check here... Maybe it's >>>>>> micro-optimizing, but I wonder if we already do too many checks in >>>>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think) >>>>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1". >>>>>> >>>>> Why this will be more efficient that HF_GUEST_MASK check? >>>> >>>> Because we have already loaded vmx->loaded_vmcs, so it's one memory >>>> access less. >>>> >>> But we will have to load vmx->vmcs1 instead :) >> >> That's not a memory load, it's an add. >> > You assume that vmx->loaded_vmcs and vmx will be both in registers here, > isn't it too much to assume? Both of them are needed in this very close line: vmx->__launched = vmx->loaded_vmcs->launched; So it is not a big assumption---even on 32-bits, perhaps. By contrast, vcpu->hflags is very unlikely to be in a register. This is of course mostly pointless alone. But if we could shave 100 cycles off a lightweight vmexit, that could give a measurable improvement on nested virt. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index bb04650..b93e09a 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -536,6 +536,7 @@ /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1f1da43..f364d16 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -374,6 +374,8 @@ struct nested_vmx { */ struct page *apic_access_page; u64 msr_ia32_feature_control; + /* Set if vmexit is L2->L1 */ + bool nested_vmx_exit; }; #define POSTED_INTR_ON 0 @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + if (!(nested_vmx_pinbased_ctls_high & + PIN_BASED_VMX_PREEMPTION_TIMER) || + !(nested_vmx_exit_ctls_high & + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { + nested_vmx_exit_ctls_high &= + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); + nested_vmx_pinbased_ctls_high &= + (~PIN_BASED_VMX_PREEMPTION_TIMER); + } nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER); @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = vmcs_read32(VM_EXIT_INTR_INFO); } +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) +{ + u64 delta_tsc_l1; + u32 preempt_val_l1, preempt_val_l2, preempt_scale; + + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu, + native_read_tsc()) - vcpu->arch.last_guest_tsc; + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; + if (preempt_val_l2 <= preempt_val_l1) + preempt_val_l2 = 0; + else + preempt_val_l2 -= preempt_val_l1; + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) vmx->nested.nested_run_pending = 0; if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { + vmx->nested.nested_vmx_exit = true; nested_vmx_vmexit(vcpu); return 1; } + vmx->nested.nested_vmx_exit = false; if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) debugctlmsr = get_debugctlmsr(); vmx->__launched = vmx->loaded_vmcs->launched; + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit)) + nested_adjust_preemption_timer(vcpu); asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; + u32 exit_control; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits are further modified by vmx_set_efer() below. */ - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + exit_control = vmcs_config.vmexit_ctrl; + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + vmcs_write32(VM_EXIT_CONTROLS, exit_control); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if ((vmcs12->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER) && + (vmcs12->vm_exit_controls & + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) + vmcs12->vmx_preemption_timer_value = + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + /* * In some cases (usually, nested EPT), L2 is allowed to change its * own CR3 without exiting. If it has changed it, we must keep it.
This patch contains the following two changes: 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 with some reasons not emulated by L1, preemption timer value should be save in such exits. 2. Add support of "Save VMX-preemption timer value" VM-Exit controls to nVMX. With this patch, nested VMX preemption timer features are fully supported. Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> --- ChangeLog to v3: Move nested_adjust_preemption_timer to the latest place just before vmenter. Some minor changes. arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-)