Message ID | 1377444390-4609-1-git-send-email-yzt356@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 2013-08-25 17:26, Arthur Chunqi Li wrote: > This patch contains the following two changes: > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > with some reasons not emulated by L1, preemption timer value should > be save in such exits. > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > to nVMX. > > With this patch, nested VMX preemption timer features are fully > supported. > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > arch/x86/kvm/vmx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 44 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 57b4e12..6aa320e 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2204,7 +2204,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) > #ifdef CONFIG_X86_64 > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER)) > + nested_vmx_exit_ctls_high &= > + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > + if (!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) > + nested_vmx_pinbased_ctls_high &= > + (~PIN_BASED_VMX_PREEMPTION_TIMER); > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER); > > @@ -6706,6 +6713,22 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) > *info2 = vmcs_read32(VM_EXIT_INTR_INFO); > } > > +static void nested_fix_preempt(struct kvm_vcpu *vcpu) nested_adjust_preemption_timer - just "preempt" can be misleading. > +{ > + u64 delta_guest_tsc; > + u32 preempt_val, preempt_bit, delta_preempt_val; > + > + preempt_bit = native_read_msr(MSR_IA32_VMX_MISC) & 0x1F; This is rather preemption_timer_scale. And if there is no symbolic value for the bitmask, please introduce one. > + delta_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, > + native_read_tsc()) - vcpu->arch.last_guest_tsc; > + delta_preempt_val = delta_guest_tsc >> preempt_bit; > + preempt_val = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + if (preempt_val - delta_preempt_val < 0) > + preempt_val = 0; > + else > + preempt_val -= delta_preempt_val; > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val); The rest unfortunately wrong. It has to be split into two parts: Part one, the calculation of L1's TSC value and its storing in nested_vmx, has to be done on vmexit. Part two, reading the current TSC, calculating the time spent in L0 and converting it into L1 TSC time, this has to be done right before vmentry of L2. Arthur, please make sure that your test case detects the current breakage of preemption timer emulation properly, both /wrt to missing save/restore and also regarding missing L0 time compensation, and then check that your KVM patch fixes it based on the unit test results. Jan > +} > /* > * The guest has exited. See if we can fix it or if we need userspace > * assistance. > @@ -6734,9 +6757,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) > else > vmx->nested.nested_run_pending = 0; > > - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { > - nested_vmx_vmexit(vcpu); > - return 1; > + if (is_guest_mode(vcpu)) { > + if (nested_vmx_exit_handled(vcpu)) { > + nested_vmx_vmexit(vcpu); > + return 1; > + } else > + nested_fix_preempt(vcpu); > } > > if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { > @@ -7517,6 +7543,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > u32 exec_control; > + u32 exit_control; > > vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); > vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); > @@ -7690,7 +7717,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER > * bits are further modified by vmx_set_efer() below. > */ > - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > + exit_control = vmcs_config.vmexit_ctrl; > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > + vmcs_write32(VM_EXIT_CONTROLS, exit_control); > > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > @@ -8089,6 +8119,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { > + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > + vmcs12->vmx_preemption_timer_value = > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + else > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > + vmcs12->vmx_preemption_timer_value); > + } > + > /* > * In some cases (usually, nested EPT), L2 is allowed to change its > * own CR3 without exiting. If it has changed it, we must keep it. >
On Mon, Aug 26, 2013 at 3:23 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-08-25 17:26, Arthur Chunqi Li wrote: >> This patch contains the following two changes: >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> with some reasons not emulated by L1, preemption timer value should >> be save in such exits. >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> to nVMX. >> >> With this patch, nested VMX preemption timer features are fully >> supported. >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> arch/x86/kvm/vmx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- >> 1 file changed, 44 insertions(+), 5 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 57b4e12..6aa320e 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -2204,7 +2204,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) >> #ifdef CONFIG_X86_64 >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER)) >> + nested_vmx_exit_ctls_high &= >> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >> + if (!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) >> + nested_vmx_pinbased_ctls_high &= >> + (~PIN_BASED_VMX_PREEMPTION_TIMER); >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER); >> >> @@ -6706,6 +6713,22 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO); >> } >> >> +static void nested_fix_preempt(struct kvm_vcpu *vcpu) > > nested_adjust_preemption_timer - just "preempt" can be misleading. > >> +{ >> + u64 delta_guest_tsc; >> + u32 preempt_val, preempt_bit, delta_preempt_val; >> + >> + preempt_bit = native_read_msr(MSR_IA32_VMX_MISC) & 0x1F; > > This is rather preemption_timer_scale. And if there is no symbolic value > for the bitmask, please introduce one. > >> + delta_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, >> + native_read_tsc()) - vcpu->arch.last_guest_tsc; >> + delta_preempt_val = delta_guest_tsc >> preempt_bit; >> + preempt_val = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + if (preempt_val - delta_preempt_val < 0) >> + preempt_val = 0; >> + else >> + preempt_val -= delta_preempt_val; >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val); > > The rest unfortunately wrong. It has to be split into two parts: Part > one, the calculation of L1's TSC value and its storing in nested_vmx, > has to be done on vmexit. Part two, reading the current TSC, calculating > the time spent in L0 and converting it into L1 TSC time, this has to be > done right before vmentry of L2. As what we discussed yesterday, the calculation of L1's TSC value is not saved in nested_vmx, however, to avoid adding codes to the hot patch of vmexit. Instead, we use vcpu->arch.last_guest_tsc as the value stored on vmexit (which has been done already). And the value of "part two" is calculated in nested_fix_preempt() above (see variant delta_guest_tsc, which stores the consumed TSC value in L0). Since vmx_handle_exit is the last function called in vmexit path, I think it's OK to put "part two" here. > > Arthur, please make sure that your test case detects the current > breakage of preemption timer emulation properly, both /wrt to missing > save/restore and also regarding missing L0 time compensation, and then > check that your KVM patch fixes it based on the unit test results. OK, I will commit a patch of kvm-unit-tests to test these changes. Arthur > > Jan > >> +} >> /* >> * The guest has exited. See if we can fix it or if we need userspace >> * assistance. >> @@ -6734,9 +6757,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) >> else >> vmx->nested.nested_run_pending = 0; >> >> - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { >> - nested_vmx_vmexit(vcpu); >> - return 1; >> + if (is_guest_mode(vcpu)) { >> + if (nested_vmx_exit_handled(vcpu)) { >> + nested_vmx_vmexit(vcpu); >> + return 1; >> + } else >> + nested_fix_preempt(vcpu); >> } >> >> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { >> @@ -7517,6 +7543,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> u32 exec_control; >> + u32 exit_control; >> >> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); >> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); >> @@ -7690,7 +7717,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >> * bits are further modified by vmx_set_efer() below. >> */ >> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> + exit_control = vmcs_config.vmexit_ctrl; >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> + vmcs_write32(VM_EXIT_CONTROLS, exit_control); >> >> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >> * emulated by vmx_set_efer(), below. >> @@ -8089,6 +8119,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> vmcs12->guest_pending_dbg_exceptions = >> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); >> >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { >> + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >> + vmcs12->vmx_preemption_timer_value = >> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + else >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> + vmcs12->vmx_preemption_timer_value); >> + } >> + >> /* >> * In some cases (usually, nested EPT), L2 is allowed to change its >> * own CR3 without exiting. If it has changed it, we must keep it. >> > >
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 57b4e12..6aa320e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2204,7 +2204,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER)) + nested_vmx_exit_ctls_high &= + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); + if (!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) + nested_vmx_pinbased_ctls_high &= + (~PIN_BASED_VMX_PREEMPTION_TIMER); nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER); @@ -6706,6 +6713,22 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = vmcs_read32(VM_EXIT_INTR_INFO); } +static void nested_fix_preempt(struct kvm_vcpu *vcpu) +{ + u64 delta_guest_tsc; + u32 preempt_val, preempt_bit, delta_preempt_val; + + preempt_bit = native_read_msr(MSR_IA32_VMX_MISC) & 0x1F; + delta_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, + native_read_tsc()) - vcpu->arch.last_guest_tsc; + delta_preempt_val = delta_guest_tsc >> preempt_bit; + preempt_val = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + if (preempt_val - delta_preempt_val < 0) + preempt_val = 0; + else + preempt_val -= delta_preempt_val; + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val); +} /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -6734,9 +6757,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) else vmx->nested.nested_run_pending = 0; - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu); - return 1; + if (is_guest_mode(vcpu)) { + if (nested_vmx_exit_handled(vcpu)) { + nested_vmx_vmexit(vcpu); + return 1; + } else + nested_fix_preempt(vcpu); } if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { @@ -7517,6 +7543,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; + u32 exit_control; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -7690,7 +7717,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits are further modified by vmx_set_efer() below. */ - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + exit_control = vmcs_config.vmexit_ctrl; + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; + vmcs_write32(VM_EXIT_CONTROLS, exit_control); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. @@ -8089,6 +8119,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) + vmcs12->vmx_preemption_timer_value = + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + else + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + } + /* * In some cases (usually, nested EPT), L2 is allowed to change its * own CR3 without exiting. If it has changed it, we must keep it.
This patch contains the following two changes: 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 with some reasons not emulated by L1, preemption timer value should be save in such exits. 2. Add support of "Save VMX-preemption timer value" VM-Exit controls to nVMX. With this patch, nested VMX preemption timer features are fully supported. Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> --- arch/x86/kvm/vmx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-)