Message ID | 1377369850-18583-1-git-send-email-root@Blade1-01.Blade1-01 (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 2013-08-24 20:44, root wrote: > This patch contains the following two changes: > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > with some reasons not emulated by L1, preemption timer value should > be save in such exits. > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > to nVMX. > > With this patch, nested VMX preemption timer features are fully > supported. > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > --- > arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- > 1 file changed, 25 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 57b4e12..9579409 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) > #ifdef CONFIG_X86_64 > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER); In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its behavior properly in that case. > > @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > (vmcs_config.pin_based_exec_ctrl | > vmcs12->pin_based_vm_exec_control)); > > - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > - vmcs12->vmx_preemption_timer_value); > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { > + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > + vmcs12->vmx_preemption_timer_value = > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > + else > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > + vmcs12->vmx_preemption_timer_value); > + } This is not correct. We still need to set the vmcs to vmx_preemption_timer_value. The difference is that, on exit from L2, vmx_preemption_timer_value has to be updated according to the saved hardware state. The corresponding code is missing in your patch so far. > > /* > * Whether page-faults are trapped is determined by a combination of > @@ -7690,7 +7696,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER > * bits are further modified by vmx_set_efer() below. > */ > - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl | > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); > + else > + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); Let's prepare the value for VM_EXIT_CONTROLS in a local variable first, then write it to the vmcs. > > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > @@ -7912,6 +7922,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > } > > /* > + * If L2 support PIN_BASED_VMX_PREEMPTION_TIMER, L0 must support > + * VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. > + */ > + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && > + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { > + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > + return 1; > + } Nope, the guest is free to run the preemption timer without saving on exits. It may have a valid use case for this, e.g. that it will always reprogram it on entry. > + > + /* > * We're finally done with prerequisite checking, and can start with > * the nested entry. > */ > Jan
On Sun, Aug 25, 2013 at 2:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-08-24 20:44, root wrote: >> This patch contains the following two changes: >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> with some reasons not emulated by L1, preemption timer value should >> be save in such exits. >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> to nVMX. >> >> With this patch, nested VMX preemption timer features are fully >> supported. >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> --- >> arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- >> 1 file changed, 25 insertions(+), 5 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 57b4e12..9579409 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) >> #ifdef CONFIG_X86_64 >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER); > > In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide > PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its > behavior properly in that case. > >> >> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> (vmcs_config.pin_based_exec_ctrl | >> vmcs12->pin_based_vm_exec_control)); >> >> - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> - vmcs12->vmx_preemption_timer_value); >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { >> + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >> + vmcs12->vmx_preemption_timer_value = >> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> + else >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> + vmcs12->vmx_preemption_timer_value); >> + } > > This is not correct. We still need to set the vmcs to > vmx_preemption_timer_value. The difference is that, on exit from L2, > vmx_preemption_timer_value has to be updated according to the saved > hardware state. The corresponding code is missing in your patch so far. > >> >> /* >> * Whether page-faults are trapped is determined by a combination of >> @@ -7690,7 +7696,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >> * bits are further modified by vmx_set_efer() below. >> */ >> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl | >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >> + else >> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > > Let's prepare the value for VM_EXIT_CONTROLS in a local variable first, > then write it to the vmcs. > >> >> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >> * emulated by vmx_set_efer(), below. >> @@ -7912,6 +7922,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) >> } >> >> /* >> + * If L2 support PIN_BASED_VMX_PREEMPTION_TIMER, L0 must support >> + * VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. >> + */ >> + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && >> + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { >> + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); >> + return 1; >> + } > > Nope, the guest is free to run the preemption timer without saving on > exits. It may have a valid use case for this, e.g. that it will always > reprogram it on entry. Here "!(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)" is used to detect if hardware support "save preemption timer" feature, which means if L2 supports pinbased vmx preemption timer, host must support "save preemption timer" feature. Though nested_vmx_exit_ctls_* is used for nested env, but it can also used to reflect the host's feature. Here is what I discuss with you yesterday, and we can also get the feature via "rdmsr" here to avoid the confusion. Arthur > >> + >> + /* >> * We're finally done with prerequisite checking, and can start with >> * the nested entry. >> */ >> > > Jan > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 09:24, Arthur Chunqi Li wrote: > On Sun, Aug 25, 2013 at 2:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >> On 2013-08-24 20:44, root wrote: >>> This patch contains the following two changes: >>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>> with some reasons not emulated by L1, preemption timer value should >>> be save in such exits. >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>> to nVMX. >>> >>> With this patch, nested VMX preemption timer features are fully >>> supported. >>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>> --- >>> arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- >>> 1 file changed, 25 insertions(+), 5 deletions(-) >>> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>> index 57b4e12..9579409 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) >>> #ifdef CONFIG_X86_64 >>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>> #endif >>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>> VM_EXIT_LOAD_IA32_EFER); >> >> In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide >> PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its >> behavior properly in that case. >> >>> >>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >>> (vmcs_config.pin_based_exec_ctrl | >>> vmcs12->pin_based_vm_exec_control)); >>> >>> - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>> - vmcs12->vmx_preemption_timer_value); >>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { >>> + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>> + vmcs12->vmx_preemption_timer_value = >>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>> + else >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>> + vmcs12->vmx_preemption_timer_value); >>> + } >> >> This is not correct. We still need to set the vmcs to >> vmx_preemption_timer_value. The difference is that, on exit from L2, >> vmx_preemption_timer_value has to be updated according to the saved >> hardware state. The corresponding code is missing in your patch so far. >> >>> >>> /* >>> * Whether page-faults are trapped is determined by a combination of >>> @@ -7690,7 +7696,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >>> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >>> * bits are further modified by vmx_set_efer() below. >>> */ >>> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl | >>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >>> + else >>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> >> Let's prepare the value for VM_EXIT_CONTROLS in a local variable first, >> then write it to the vmcs. >> >>> >>> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >>> * emulated by vmx_set_efer(), below. >>> @@ -7912,6 +7922,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) >>> } >>> >>> /* >>> + * If L2 support PIN_BASED_VMX_PREEMPTION_TIMER, L0 must support >>> + * VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. >>> + */ >>> + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && >>> + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { >>> + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); >>> + return 1; >>> + } >> >> Nope, the guest is free to run the preemption timer without saving on >> exits. It may have a valid use case for this, e.g. that it will always >> reprogram it on entry. > Here "!(nested_vmx_exit_ctls_high & > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)" is used to detect if hardware > support "save preemption timer" feature, which means if L2 supports > pinbased vmx preemption timer, host must support "save preemption > timer" feature. Sorry, parsed the code incorrectly. > Though nested_vmx_exit_ctls_* is used for nested env, > but it can also used to reflect the host's feature. Here is what I > discuss with you yesterday, and we can also get the feature via > "rdmsr" here to avoid the confusion. Yes. The point is that we will not even expose PIN_BASED_VMX_PREEMPTION_TIMER if VM_EXIT_SAVE_VMX_PREEMPTION_TIMER is missing. If the guest then requests the former, it simply sets an invalid pin-based control value which we already catch and report. So this hunk becomes redundant. Jan
> From: Jan Kiszka <jan.kiszka@web.de> > To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, > Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com > Date: 25/08/2013 09:44 AM > Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption timer > Sent by: kvm-owner@vger.kernel.org > > On 2013-08-24 20:44, root wrote: > > This patch contains the following two changes: > > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > > with some reasons not emulated by L1, preemption timer value should > > be save in such exits. > > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > > to nVMX. > > > > With this patch, nested VMX preemption timer features are fully > > supported. > > > > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > > --- > > > > @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu > *vcpu, struct vmcs12 *vmcs12) > > (vmcs_config.pin_based_exec_ctrl | > > vmcs12->pin_based_vm_exec_control)); > > > > - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) > > - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > > - vmcs12->vmx_preemption_timer_value); > > + if (vmcs12->pin_based_vm_exec_control & > PIN_BASED_VMX_PREEMPTION_TIMER) { > > + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > > + vmcs12->vmx_preemption_timer_value = > > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > > + else > > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > > + vmcs12->vmx_preemption_timer_value); > > + } > > This is not correct. We still need to set the vmcs to > vmx_preemption_timer_value. The difference is that, on exit from L2, > vmx_preemption_timer_value has to be updated according to the saved > hardware state. The corresponding code is missing in your patch so far. I think something else maybe be missing here: assuming L0 handles exits for L2 without involving L1 (e.g. external interrupts or ept violations), then, we may spend some cycles in L0 handling these exits. Note L1 is not aware of these exits and from L1 perspective L2 was running on the CPU. That means that we may need to reduce these cycles spent at L0 from the preemtion timer or emulate a preemption timer exit to force a transition to L1 instead of resuming L2. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Aug 25, 2013 at 3:28 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-08-25 09:24, Arthur Chunqi Li wrote: >> On Sun, Aug 25, 2013 at 2:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >>> On 2013-08-24 20:44, root wrote: >>>> This patch contains the following two changes: >>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>> with some reasons not emulated by L1, preemption timer value should >>>> be save in such exits. >>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>>> to nVMX. >>>> >>>> With this patch, nested VMX preemption timer features are fully >>>> supported. >>>> >>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>> --- >>>> arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- >>>> 1 file changed, 25 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>>> index 57b4e12..9579409 100644 >>>> --- a/arch/x86/kvm/vmx.c >>>> +++ b/arch/x86/kvm/vmx.c >>>> @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) >>>> #ifdef CONFIG_X86_64 >>>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>>> #endif >>>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >>>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >>>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>>> VM_EXIT_LOAD_IA32_EFER); >>> >>> In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide >>> PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its >>> behavior properly in that case. Besides, we need to test that in the absence of PIN_BASED_VMX_PREEMPTION_TIMER, we need to hide VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, though this should not happen according to Intel SDM. >>> >>>> >>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >>>> (vmcs_config.pin_based_exec_ctrl | >>>> vmcs12->pin_based_vm_exec_control)); >>>> >>>> - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>> - vmcs12->vmx_preemption_timer_value); >>>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { >>>> + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>> + vmcs12->vmx_preemption_timer_value = >>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>> + else >>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>> + vmcs12->vmx_preemption_timer_value); >>>> + } >>> >>> This is not correct. We still need to set the vmcs to >>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>> vmx_preemption_timer_value has to be updated according to the saved >>> hardware state. The corresponding code is missing in your patch so far. >>> >>>> >>>> /* >>>> * Whether page-faults are trapped is determined by a combination of >>>> @@ -7690,7 +7696,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >>>> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >>>> * bits are further modified by vmx_set_efer() below. >>>> */ >>>> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >>>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) >>>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl | >>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); >>>> + else >>>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >>> >>> Let's prepare the value for VM_EXIT_CONTROLS in a local variable first, >>> then write it to the vmcs. >>> >>>> >>>> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are >>>> * emulated by vmx_set_efer(), below. >>>> @@ -7912,6 +7922,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) >>>> } >>>> >>>> /* >>>> + * If L2 support PIN_BASED_VMX_PREEMPTION_TIMER, L0 must support >>>> + * VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. >>>> + */ >>>> + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && >>>> + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { >>>> + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); >>>> + return 1; >>>> + } >>> >>> Nope, the guest is free to run the preemption timer without saving on >>> exits. It may have a valid use case for this, e.g. that it will always >>> reprogram it on entry. >> Here "!(nested_vmx_exit_ctls_high & >> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)" is used to detect if hardware >> support "save preemption timer" feature, which means if L2 supports >> pinbased vmx preemption timer, host must support "save preemption >> timer" feature. > > Sorry, parsed the code incorrectly. > >> Though nested_vmx_exit_ctls_* is used for nested env, >> but it can also used to reflect the host's feature. Here is what I >> discuss with you yesterday, and we can also get the feature via >> "rdmsr" here to avoid the confusion. > > Yes. The point is that we will not even expose > PIN_BASED_VMX_PREEMPTION_TIMER if VM_EXIT_SAVE_VMX_PREEMPTION_TIMER is > missing. If the guest then requests the former, it simply sets an > invalid pin-based control value which we already catch and report. So > this hunk becomes redundant. Yep, if we check it when setting nested_vmx_*_ctls_high this hunk is useless. Besides, see comments above. Arthur > > Jan > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Aug 25, 2013 at 3:37 PM, Abel Gordon <ABELG@il.ibm.com> wrote: > > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >> Date: 25/08/2013 09:44 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-24 20:44, root wrote: >> > This patch contains the following two changes: >> > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> > with some reasons not emulated by L1, preemption timer value should >> > be save in such exits. >> > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> > to nVMX. >> > >> > With this patch, nested VMX preemption timer features are fully >> > supported. >> > >> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> > --- > >> > >> > @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >> *vcpu, struct vmcs12 *vmcs12) >> > (vmcs_config.pin_based_exec_ctrl | >> > vmcs12->pin_based_vm_exec_control)); >> > >> > - if (vmcs12->pin_based_vm_exec_control & > PIN_BASED_VMX_PREEMPTION_TIMER) >> > - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> > - vmcs12->vmx_preemption_timer_value); >> > + if (vmcs12->pin_based_vm_exec_control & >> PIN_BASED_VMX_PREEMPTION_TIMER) { >> > + if (vmcs12->vm_exit_controls & > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >> > + vmcs12->vmx_preemption_timer_value = >> > + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> > + else >> > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> > + vmcs12->vmx_preemption_timer_value); >> > + } >> >> This is not correct. We still need to set the vmcs to >> vmx_preemption_timer_value. The difference is that, on exit from L2, >> vmx_preemption_timer_value has to be updated according to the saved >> hardware state. The corresponding code is missing in your patch so far. > > I think something else maybe be missing here: assuming L0 handles exits > for L2 without involving L1 (e.g. external interrupts or ept violations), > then, we may spend some cycles in L0 handling these exits. Note L1 is not > aware of these exits and from L1 perspective L2 was running on the CPU. > That means that we may need to reduce these cycles spent at > L0 from the preemtion timer or emulate a preemption timer exit to > force a transition to L1 instead of resuming L2. My solution is setting "save preemption value" feature of L2 if L2 sets "vmx preemption timer" feature, thus external interrupts (or others) will save the exact value in L2's vmcs, and the resume of L2 will load the value in L2's vmcs. Thus cycles of handling these vmexit in L0 will not affect L2's preemption value. Arthur > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 09:37, Abel Gordon wrote: > > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >> Date: 25/08/2013 09:44 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-24 20:44, root wrote: >>> This patch contains the following two changes: >>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>> with some reasons not emulated by L1, preemption timer value should >>> be save in such exits. >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>> to nVMX. >>> >>> With this patch, nested VMX preemption timer features are fully >>> supported. >>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>> --- > >>> >>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >> *vcpu, struct vmcs12 *vmcs12) >>> (vmcs_config.pin_based_exec_ctrl | >>> vmcs12->pin_based_vm_exec_control)); >>> >>> - if (vmcs12->pin_based_vm_exec_control & > PIN_BASED_VMX_PREEMPTION_TIMER) >>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>> - vmcs12->vmx_preemption_timer_value); >>> + if (vmcs12->pin_based_vm_exec_control & >> PIN_BASED_VMX_PREEMPTION_TIMER) { >>> + if (vmcs12->vm_exit_controls & > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>> + vmcs12->vmx_preemption_timer_value = >>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>> + else >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>> + vmcs12->vmx_preemption_timer_value); >>> + } >> >> This is not correct. We still need to set the vmcs to >> vmx_preemption_timer_value. The difference is that, on exit from L2, >> vmx_preemption_timer_value has to be updated according to the saved >> hardware state. The corresponding code is missing in your patch so far. > > I think something else maybe be missing here: assuming L0 handles exits > for L2 without involving L1 (e.g. external interrupts or ept violations), > then, we may spend some cycles in L0 handling these exits. Note L1 is not > aware of these exits and from L1 perspective L2 was running on the CPU. > That means that we may need to reduce these cycles spent at > L0 from the preemtion timer or emulate a preemption timer exit to > force a transition to L1 instead of resuming L2. That's precisely what the logic I described should achieve: reload the value we saved on L2 exit on reentry. Jan
On 2013-08-25 09:37, Arthur Chunqi Li wrote: > On Sun, Aug 25, 2013 at 3:28 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >> On 2013-08-25 09:24, Arthur Chunqi Li wrote: >>> On Sun, Aug 25, 2013 at 2:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >>>> On 2013-08-24 20:44, root wrote: >>>>> This patch contains the following two changes: >>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>> with some reasons not emulated by L1, preemption timer value should >>>>> be save in such exits. >>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>>>> to nVMX. >>>>> >>>>> With this patch, nested VMX preemption timer features are fully >>>>> supported. >>>>> >>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>> --- >>>>> arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- >>>>> 1 file changed, 25 insertions(+), 5 deletions(-) >>>>> >>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>>>> index 57b4e12..9579409 100644 >>>>> --- a/arch/x86/kvm/vmx.c >>>>> +++ b/arch/x86/kvm/vmx.c >>>>> @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) >>>>> #ifdef CONFIG_X86_64 >>>>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>>>> #endif >>>>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >>>>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >>>>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>>>> VM_EXIT_LOAD_IA32_EFER); >>>> >>>> In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide >>>> PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its >>>> behavior properly in that case. > Besides, we need to test that in the absence of > PIN_BASED_VMX_PREEMPTION_TIMER, we need to hide > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, though this should not happen > according to Intel SDM. If the SDM guarantees this for us, we don't need such a safety measure. Otherwise, it should be added, yes. Jan
On Sun, Aug 25, 2013 at 3:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-08-25 09:37, Arthur Chunqi Li wrote: >> On Sun, Aug 25, 2013 at 3:28 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >>> On 2013-08-25 09:24, Arthur Chunqi Li wrote: >>>> On Sun, Aug 25, 2013 at 2:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >>>>> On 2013-08-24 20:44, root wrote: >>>>>> This patch contains the following two changes: >>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>> be save in such exits. >>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>>>>> to nVMX. >>>>>> >>>>>> With this patch, nested VMX preemption timer features are fully >>>>>> supported. >>>>>> >>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>> --- >>>>>> arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- >>>>>> 1 file changed, 25 insertions(+), 5 deletions(-) >>>>>> >>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>>>>> index 57b4e12..9579409 100644 >>>>>> --- a/arch/x86/kvm/vmx.c >>>>>> +++ b/arch/x86/kvm/vmx.c >>>>>> @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) >>>>>> #ifdef CONFIG_X86_64 >>>>>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>>>>> #endif >>>>>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; >>>>>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>>>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; >>>>>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>>>>> VM_EXIT_LOAD_IA32_EFER); >>>>> >>>>> In the absence of VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, you need to hide >>>>> PIN_BASED_VMX_PREEMPTION_TIMER from the guest as we cannot emulate its >>>>> behavior properly in that case. >> Besides, we need to test that in the absence of >> PIN_BASED_VMX_PREEMPTION_TIMER, we need to hide >> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, though this should not happen >> according to Intel SDM. > > If the SDM guarantees this for us, we don't need such a safety measure. > Otherwise, it should be added, yes. The SDM has such description (see 26.2.1.2): If “activate VMX-preemption timer” VM-execution control is 0, the “save VMX-preemption timer value” VM-exit control must also be 0. It doesn't tell us if these two flags are consistent when getting them from related MSR (IA32_VMX_PINBASED_CTLS and IA32_VMX_EXIT_CTLS). So I think the check is needed here. Arthur > > Jan > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > From: Jan Kiszka <jan.kiszka@web.de> > To: Abel Gordon/Haifa/IBM@IBMIL, > Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, > pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > Date: 25/08/2013 10:43 AM > Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption timer > Sent by: kvm-owner@vger.kernel.org > > On 2013-08-25 09:37, Abel Gordon wrote: > > > > > >> From: Jan Kiszka <jan.kiszka@web.de> > >> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, > >> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com > >> Date: 25/08/2013 09:44 AM > >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > > timer > >> Sent by: kvm-owner@vger.kernel.org > >> > >> On 2013-08-24 20:44, root wrote: > >>> This patch contains the following two changes: > >>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > >>> with some reasons not emulated by L1, preemption timer value should > >>> be save in such exits. > >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > >>> to nVMX. > >>> > >>> With this patch, nested VMX preemption timer features are fully > >>> supported. > >>> > >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > >>> --- > > > >>> > >>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu > >> *vcpu, struct vmcs12 *vmcs12) > >>> (vmcs_config.pin_based_exec_ctrl | > >>> vmcs12->pin_based_vm_exec_control)); > >>> > >>> - if (vmcs12->pin_based_vm_exec_control & > > PIN_BASED_VMX_PREEMPTION_TIMER) > >>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>> - vmcs12->vmx_preemption_timer_value); > >>> + if (vmcs12->pin_based_vm_exec_control & > >> PIN_BASED_VMX_PREEMPTION_TIMER) { > >>> + if (vmcs12->vm_exit_controls & > > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > >>> + vmcs12->vmx_preemption_timer_value = > >>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > >>> + else > >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>> + vmcs12->vmx_preemption_timer_value); > >>> + } > >> > >> This is not correct. We still need to set the vmcs to > >> vmx_preemption_timer_value. The difference is that, on exit from L2, > >> vmx_preemption_timer_value has to be updated according to the saved > >> hardware state. The corresponding code is missing in your patch so far. > > > > I think something else maybe be missing here: assuming L0 handles exits > > for L2 without involving L1 (e.g. external interrupts or ept violations), > > then, we may spend some cycles in L0 handling these exits. Note L1 is not > > aware of these exits and from L1 perspective L2 was running on the CPU. > > That means that we may need to reduce these cycles spent at > > L0 from the preemtion timer or emulate a preemption timer exit to > > force a transition to L1 instead of resuming L2. > > That's precisely what the logic I described should achieve: reload the > value we saved on L2 exit on reentry. But don't you think we should also reduce the cycles spent at L0 from the preemption timer ? I mean, if we spent X cycles at L0 handling a L2 exit which was not forwarded to L1, then, before we resume L2, the preemption timer should be: (previous_value_on_exit - X). If (previous_value_on_exit - X) < 0, then we should force ("emulate") a preemption timer exit between L2 and L1. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 09:50, Abel Gordon wrote: > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >> Date: 25/08/2013 10:43 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-25 09:37, Abel Gordon wrote: >>> >>> >>>> From: Jan Kiszka <jan.kiszka@web.de> >>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>> Date: 25/08/2013 09:44 AM >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>> timer >>>> Sent by: kvm-owner@vger.kernel.org >>>> >>>> On 2013-08-24 20:44, root wrote: >>>>> This patch contains the following two changes: >>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>> with some reasons not emulated by L1, preemption timer value should >>>>> be save in such exits. >>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >>>>> to nVMX. >>>>> >>>>> With this patch, nested VMX preemption timer features are fully >>>>> supported. >>>>> >>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>> --- >>> >>>>> >>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>> *vcpu, struct vmcs12 *vmcs12) >>>>> (vmcs_config.pin_based_exec_ctrl | >>>>> vmcs12->pin_based_vm_exec_control)); >>>>> >>>>> - if (vmcs12->pin_based_vm_exec_control & >>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>> - vmcs12->vmx_preemption_timer_value); >>>>> + if (vmcs12->pin_based_vm_exec_control & >>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>> + if (vmcs12->vm_exit_controls & >>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>> + vmcs12->vmx_preemption_timer_value = >>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>> + else >>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>> + vmcs12->vmx_preemption_timer_value); >>>>> + } >>>> >>>> This is not correct. We still need to set the vmcs to >>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>> vmx_preemption_timer_value has to be updated according to the saved >>>> hardware state. The corresponding code is missing in your patch so > far. >>> >>> I think something else maybe be missing here: assuming L0 handles exits >>> for L2 without involving L1 (e.g. external interrupts or ept > violations), >>> then, we may spend some cycles in L0 handling these exits. Note L1 is > not >>> aware of these exits and from L1 perspective L2 was running on the CPU. >>> That means that we may need to reduce these cycles spent at >>> L0 from the preemtion timer or emulate a preemption timer exit to >>> force a transition to L1 instead of resuming L2. >> >> That's precisely what the logic I described should achieve: reload the >> value we saved on L2 exit on reentry. > > But don't you think we should also reduce the cycles spent at L0 from the > preemption timer ? I mean, if we spent X cycles at L0 handling a L2 exit > which was not forwarded to L1, then, before we resume L2, > the preemption timer should be: (previous_value_on_exit - X). > If (previous_value_on_exit - X) < 0, then we should force ("emulate") a > preemption timer exit between L2 and L1. We ask the hardware to save the value of the preemption on L2 exit. This value will be exposed to L1 (if it asked for saving as well) and/or be written back to the hardware on L2 reenty (unless L1 had a chance to run and modified it). So the time spent in L0 is implicitly subtracted. Jan PS: You had kvm-owner in CC.
On Sun, Aug 25, 2013 at 3:50 PM, Abel Gordon <ABELG@il.ibm.com> wrote: > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >> Date: 25/08/2013 10:43 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-25 09:37, Abel Gordon wrote: >> > >> > >> >> From: Jan Kiszka <jan.kiszka@web.de> >> >> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >> >> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >> >> Date: 25/08/2013 09:44 AM >> >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >> > timer >> >> Sent by: kvm-owner@vger.kernel.org >> >> >> >> On 2013-08-24 20:44, root wrote: >> >>> This patch contains the following two changes: >> >>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> >>> with some reasons not emulated by L1, preemption timer value should >> >>> be save in such exits. >> >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls >> >>> to nVMX. >> >>> >> >>> With this patch, nested VMX preemption timer features are fully >> >>> supported. >> >>> >> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> >>> --- >> > >> >>> >> >>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >> >> *vcpu, struct vmcs12 *vmcs12) >> >>> (vmcs_config.pin_based_exec_ctrl | >> >>> vmcs12->pin_based_vm_exec_control)); >> >>> >> >>> - if (vmcs12->pin_based_vm_exec_control & >> > PIN_BASED_VMX_PREEMPTION_TIMER) >> >>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> >>> - vmcs12->vmx_preemption_timer_value); >> >>> + if (vmcs12->pin_based_vm_exec_control & >> >> PIN_BASED_VMX_PREEMPTION_TIMER) { >> >>> + if (vmcs12->vm_exit_controls & >> > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >> >>> + vmcs12->vmx_preemption_timer_value = >> >>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> >>> + else >> >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> >>> + vmcs12->vmx_preemption_timer_value); >> >>> + } >> >> >> >> This is not correct. We still need to set the vmcs to >> >> vmx_preemption_timer_value. The difference is that, on exit from L2, >> >> vmx_preemption_timer_value has to be updated according to the saved >> >> hardware state. The corresponding code is missing in your patch so > far. >> > >> > I think something else maybe be missing here: assuming L0 handles exits >> > for L2 without involving L1 (e.g. external interrupts or ept > violations), >> > then, we may spend some cycles in L0 handling these exits. Note L1 is > not >> > aware of these exits and from L1 perspective L2 was running on the CPU. >> > That means that we may need to reduce these cycles spent at >> > L0 from the preemtion timer or emulate a preemption timer exit to >> > force a transition to L1 instead of resuming L2. >> >> That's precisely what the logic I described should achieve: reload the >> value we saved on L2 exit on reentry. > > But don't you think we should also reduce the cycles spent at L0 from the > preemption timer ? I mean, if we spent X cycles at L0 handling a L2 exit > which was not forwarded to L1, then, before we resume L2, > the preemption timer should be: (previous_value_on_exit - X). > If (previous_value_on_exit - X) < 0, then we should force ("emulate") a > preemption timer exit between L2 and L1. Sorry, I previously misunderstand your comments. But why should we need to exclude cycles in L0 from L2 preemption value? These cycles are not spent by L2 and it should not be on L2. Arthur > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
kvm-owner@vger.kernel.org wrote on 25/08/2013 10:55:24 AM: > From: Arthur Chunqi Li <yzt356@gmail.com> > To: Abel Gordon/Haifa/IBM@IBMIL, > Cc: Jan Kiszka <jan.kiszka@web.de>, Gleb Natapov <gleb@redhat.com>, > kvm <kvm@vger.kernel.org>, kvm-owner@vger.kernel.org, Paolo Bonzini > <pbonzini@redhat.com> > Date: 25/08/2013 10:55 AM > Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption timer > Sent by: kvm-owner@vger.kernel.org > > On Sun, Aug 25, 2013 at 3:50 PM, Abel Gordon <ABELG@il.ibm.com> wrote: > > > > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > > > >> From: Jan Kiszka <jan.kiszka@web.de> > >> To: Abel Gordon/Haifa/IBM@IBMIL, > >> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, > >> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > >> Date: 25/08/2013 10:43 AM > >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > > timer > >> Sent by: kvm-owner@vger.kernel.org > >> > >> On 2013-08-25 09:37, Abel Gordon wrote: > >> > > >> > > >> >> From: Jan Kiszka <jan.kiszka@web.de> > >> >> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, > >> >> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com > >> >> Date: 25/08/2013 09:44 AM > >> >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > >> > timer > >> >> Sent by: kvm-owner@vger.kernel.org > >> >> > >> >> On 2013-08-24 20:44, root wrote: > >> >>> This patch contains the following two changes: > >> >>> 1. Fix the bug in nested preemption timer support. If vmexit L2-> L0 > >> >>> with some reasons not emulated by L1, preemption timer value should > >> >>> be save in such exits. > >> >>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > >> >>> to nVMX. > >> >>> > >> >>> With this patch, nested VMX preemption timer features are fully > >> >>> supported. > >> >>> > >> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > >> >>> --- > >> > > >> >>> > >> >>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu > >> >> *vcpu, struct vmcs12 *vmcs12) > >> >>> (vmcs_config.pin_based_exec_ctrl | > >> >>> vmcs12->pin_based_vm_exec_control)); > >> >>> > >> >>> - if (vmcs12->pin_based_vm_exec_control & > >> > PIN_BASED_VMX_PREEMPTION_TIMER) > >> >>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >> >>> - vmcs12->vmx_preemption_timer_value); > >> >>> + if (vmcs12->pin_based_vm_exec_control & > >> >> PIN_BASED_VMX_PREEMPTION_TIMER) { > >> >>> + if (vmcs12->vm_exit_controls & > >> > VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > >> >>> + vmcs12->vmx_preemption_timer_value = > >> >>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > >> >>> + else > >> >>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >> >>> + vmcs12->vmx_preemption_timer_value); > >> >>> + } > >> >> > >> >> This is not correct. We still need to set the vmcs to > >> >> vmx_preemption_timer_value. The difference is that, on exit from L2, > >> >> vmx_preemption_timer_value has to be updated according to the saved > >> >> hardware state. The corresponding code is missing in your patch so > > far. > >> > > >> > I think something else maybe be missing here: assuming L0 handles exits > >> > for L2 without involving L1 (e.g. external interrupts or ept > > violations), > >> > then, we may spend some cycles in L0 handling these exits. Note L1 is > > not > >> > aware of these exits and from L1 perspective L2 was running on the CPU. > >> > That means that we may need to reduce these cycles spent at > >> > L0 from the preemtion timer or emulate a preemption timer exit to > >> > force a transition to L1 instead of resuming L2. > >> > >> That's precisely what the logic I described should achieve: reload the > >> value we saved on L2 exit on reentry. > > > > But don't you think we should also reduce the cycles spent at L0 from the > > preemption timer ? I mean, if we spent X cycles at L0 handling a L2 exit > > which was not forwarded to L1, then, before we resume L2, > > the preemption timer should be: (previous_value_on_exit - X). > > If (previous_value_on_exit - X) < 0, then we should force ("emulate") a > > preemption timer exit between L2 and L1. > Sorry, I previously misunderstand your comments. But why should we > need to exclude cycles in L0 from L2 preemption value? These cycles > are not spent by L2 and it should not be on L2. L1 asked the "hardware" (emulated by L0) to run L2 and force an exit after "Y" cycles. Now, in practice, we may spend "X" cycles at L0 handling exits without switching to L1. That means that from L1 perspective L2 was running all these X cycles. L1 should assume that the instructions per cycle the CPU executed decreased but the cycles were spent. That's why I believe you should take in account these X cycles. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 10:04, Abel Gordon wrote: > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:55:24 AM: > >> From: Arthur Chunqi Li <yzt356@gmail.com> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> Cc: Jan Kiszka <jan.kiszka@web.de>, Gleb Natapov <gleb@redhat.com>, >> kvm <kvm@vger.kernel.org>, kvm-owner@vger.kernel.org, Paolo Bonzini >> <pbonzini@redhat.com> >> Date: 25/08/2013 10:55 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On Sun, Aug 25, 2013 at 3:50 PM, Abel Gordon <ABELG@il.ibm.com> wrote: >>> >>> >>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>> >>>> From: Jan Kiszka <jan.kiszka@web.de> >>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>> Date: 25/08/2013 10:43 AM >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>> timer >>>> Sent by: kvm-owner@vger.kernel.org >>>> >>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>> >>>>> >>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>> Date: 25/08/2013 09:44 AM >>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX > preemption >>>>> timer >>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>> >>>>>> On 2013-08-24 20:44, root wrote: >>>>>>> This patch contains the following two changes: >>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2-> > L0 >>>>>>> with some reasons not emulated by L1, preemption timer value > should >>>>>>> be save in such exits. >>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit > controls >>>>>>> to nVMX. >>>>>>> >>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>> supported. >>>>>>> >>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>> --- >>>>> >>>>>>> >>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>> >>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>> + if (vmcs12->vm_exit_controls & >>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>> + else >>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>> + } >>>>>> >>>>>> This is not correct. We still need to set the vmcs to >>>>>> vmx_preemption_timer_value. The difference is that, on exit from > L2, >>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>> hardware state. The corresponding code is missing in your patch so >>> far. >>>>> >>>>> I think something else maybe be missing here: assuming L0 handles > exits >>>>> for L2 without involving L1 (e.g. external interrupts or ept >>> violations), >>>>> then, we may spend some cycles in L0 handling these exits. Note L1 > is >>> not >>>>> aware of these exits and from L1 perspective L2 was running on the > CPU. >>>>> That means that we may need to reduce these cycles spent at >>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>> force a transition to L1 instead of resuming L2. >>>> >>>> That's precisely what the logic I described should achieve: reload the >>>> value we saved on L2 exit on reentry. >>> >>> But don't you think we should also reduce the cycles spent at L0 from > the >>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 > exit >>> which was not forwarded to L1, then, before we resume L2, >>> the preemption timer should be: (previous_value_on_exit - X). >>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>> preemption timer exit between L2 and L1. >> Sorry, I previously misunderstand your comments. But why should we >> need to exclude cycles in L0 from L2 preemption value? These cycles >> are not spent by L2 and it should not be on L2. > > L1 asked the "hardware" (emulated by L0) to run L2 and force an exit > after "Y" cycles. Now, in practice, we may spend "X" cycles at L0 handling > exits without switching to L1. That means that from L1 perspective L2 > was running all these X cycles. L1 should assume that the instructions per > cycle > the CPU executed decreased but the cycles were spent. That's why I believe > you should take in account these X cycles. > Now I get it. There is likely some truth in this as the reference clock for the preemption timer, the TSC, isn't stopped for L1/L2 while running in L0. And the SDM demands the countdown to be proportional to that clock. Jan
kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: > From: Jan Kiszka <jan.kiszka@web.de> > To: Abel Gordon/Haifa/IBM@IBMIL, > Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, > "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > Date: 25/08/2013 10:54 AM > Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption timer > Sent by: kvm-owner@vger.kernel.org > > On 2013-08-25 09:50, Abel Gordon wrote: > > > > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > > > >> From: Jan Kiszka <jan.kiszka@web.de> > >> To: Abel Gordon/Haifa/IBM@IBMIL, > >> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, > >> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > >> Date: 25/08/2013 10:43 AM > >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > > timer > >> Sent by: kvm-owner@vger.kernel.org > >> > >> On 2013-08-25 09:37, Abel Gordon wrote: > >>> > >>> > >>>> From: Jan Kiszka <jan.kiszka@web.de> > >>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, > >>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com > >>>> Date: 25/08/2013 09:44 AM > >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > >>> timer > >>>> Sent by: kvm-owner@vger.kernel.org > >>>> > >>>> On 2013-08-24 20:44, root wrote: > >>>>> This patch contains the following two changes: > >>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > >>>>> with some reasons not emulated by L1, preemption timer value should > >>>>> be save in such exits. > >>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls > >>>>> to nVMX. > >>>>> > >>>>> With this patch, nested VMX preemption timer features are fully > >>>>> supported. > >>>>> > >>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > >>>>> --- > >>> > >>>>> > >>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu > >>>> *vcpu, struct vmcs12 *vmcs12) > >>>>> (vmcs_config.pin_based_exec_ctrl | > >>>>> vmcs12->pin_based_vm_exec_control)); > >>>>> > >>>>> - if (vmcs12->pin_based_vm_exec_control & > >>> PIN_BASED_VMX_PREEMPTION_TIMER) > >>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>>>> - vmcs12->vmx_preemption_timer_value); > >>>>> + if (vmcs12->pin_based_vm_exec_control & > >>>> PIN_BASED_VMX_PREEMPTION_TIMER) { > >>>>> + if (vmcs12->vm_exit_controls & > >>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > >>>>> + vmcs12->vmx_preemption_timer_value = > >>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > >>>>> + else > >>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>>>> + vmcs12->vmx_preemption_timer_value); > >>>>> + } > >>>> > >>>> This is not correct. We still need to set the vmcs to > >>>> vmx_preemption_timer_value. The difference is that, on exit from L2, > >>>> vmx_preemption_timer_value has to be updated according to the saved > >>>> hardware state. The corresponding code is missing in your patch so > > far. > >>> > >>> I think something else maybe be missing here: assuming L0 handles exits > >>> for L2 without involving L1 (e.g. external interrupts or ept > > violations), > >>> then, we may spend some cycles in L0 handling these exits. Note L1 is > > not > >>> aware of these exits and from L1 perspective L2 was running on the CPU. > >>> That means that we may need to reduce these cycles spent at > >>> L0 from the preemtion timer or emulate a preemption timer exit to > >>> force a transition to L1 instead of resuming L2. > >> > >> That's precisely what the logic I described should achieve: reload the > >> value we saved on L2 exit on reentry. > > > > But don't you think we should also reduce the cycles spent at L0 from the > > preemption timer ? I mean, if we spent X cycles at L0 handling a L2 exit > > which was not forwarded to L1, then, before we resume L2, > > the preemption timer should be: (previous_value_on_exit - X). > > If (previous_value_on_exit - X) < 0, then we should force ("emulate") a > > preemption timer exit between L2 and L1. > > We ask the hardware to save the value of the preemption on L2 exit. This > value will be exposed to L1 (if it asked for saving as well) and/or be > written back to the hardware on L2 reenty (unless L1 had a chance to run > and modified it). So the time spent in L0 is implicitly subtracted. I think you are suggesting the following, please correct me if I am wrong. 1) L1 resumes L2 with preemption timer enabled 2) L0 emulates the resume/launch 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption timer specified by L1) 4) L0 saved the preemption timer (original value - Y) 5) L0 spends X cycles handling the external interrupt 6) L0 resumes L2 with preemption timer = original value - Y Note that in this case "X is ignored". I was suggesting to do the following: 6) If original value - Y - X > 0 then L0 resumes L2 with preemption timer = original value - Y - X else L0 emulates a L2->L1 preemption timer exit (resumes L1) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 10:18, Abel Gordon wrote: > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >> Date: 25/08/2013 10:54 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-25 09:50, Abel Gordon wrote: >>> >>> >>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>> >>>> From: Jan Kiszka <jan.kiszka@web.de> >>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>> Date: 25/08/2013 10:43 AM >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>> timer >>>> Sent by: kvm-owner@vger.kernel.org >>>> >>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>> >>>>> >>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>> Date: 25/08/2013 09:44 AM >>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX > preemption >>>>> timer >>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>> >>>>>> On 2013-08-24 20:44, root wrote: >>>>>>> This patch contains the following two changes: >>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>>> be save in such exits. >>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit > controls >>>>>>> to nVMX. >>>>>>> >>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>> supported. >>>>>>> >>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>> --- >>>>> >>>>>>> >>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>> >>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>> + if (vmcs12->vm_exit_controls & >>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>> + else >>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>> + } >>>>>> >>>>>> This is not correct. We still need to set the vmcs to >>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>> hardware state. The corresponding code is missing in your patch so >>> far. >>>>> >>>>> I think something else maybe be missing here: assuming L0 handles > exits >>>>> for L2 without involving L1 (e.g. external interrupts or ept >>> violations), >>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is >>> not >>>>> aware of these exits and from L1 perspective L2 was running on the > CPU. >>>>> That means that we may need to reduce these cycles spent at >>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>> force a transition to L1 instead of resuming L2. >>>> >>>> That's precisely what the logic I described should achieve: reload the >>>> value we saved on L2 exit on reentry. >>> >>> But don't you think we should also reduce the cycles spent at L0 from > the >>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 > exit >>> which was not forwarded to L1, then, before we resume L2, >>> the preemption timer should be: (previous_value_on_exit - X). >>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>> preemption timer exit between L2 and L1. >> >> We ask the hardware to save the value of the preemption on L2 exit. This >> value will be exposed to L1 (if it asked for saving as well) and/or be >> written back to the hardware on L2 reenty (unless L1 had a chance to run >> and modified it). So the time spent in L0 is implicitly subtracted. > > I think you are suggesting the following, please correct me if I am wrong. > 1) L1 resumes L2 with preemption timer enabled > 2) L0 emulates the resume/launch > 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption > timer specified by L1) > 4) L0 saved the preemption timer (original value - Y) > 5) L0 spends X cycles handling the external interrupt > 6) L0 resumes L2 with preemption timer = original value - Y > > Note that in this case "X is ignored". Yes, but see my other reply. > > I was suggesting to do the following: > 6) If original value - Y - X > 0 then > L0 resumes L2 with preemption timer = original value - Y - X > else > L0 emulates a L2->L1 preemption timer exit (resumes L1) Almost . 6) should be: If exit to L1 occurred after last L2, set X to 0. Then load MAX(original value - Y - X, 0). The hardware will trigger the exit for us. Jan
On 2013-08-25 10:25, Jan Kiszka wrote: > On 2013-08-25 10:18, Abel Gordon wrote: >> >> >> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: >> >>> From: Jan Kiszka <jan.kiszka@web.de> >>> To: Abel Gordon/Haifa/IBM@IBMIL, >>> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >>> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>> Date: 25/08/2013 10:54 AM >>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >> timer >>> Sent by: kvm-owner@vger.kernel.org >>> >>> On 2013-08-25 09:50, Abel Gordon wrote: >>>> >>>> >>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>>> >>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>>> Date: 25/08/2013 10:43 AM >>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>>> timer >>>>> Sent by: kvm-owner@vger.kernel.org >>>>> >>>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>>> >>>>>> >>>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>>> Date: 25/08/2013 09:44 AM >>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX >> preemption >>>>>> timer >>>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>>> >>>>>>> On 2013-08-24 20:44, root wrote: >>>>>>>> This patch contains the following two changes: >>>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>>>> be save in such exits. >>>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit >> controls >>>>>>>> to nVMX. >>>>>>>> >>>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>>> supported. >>>>>>>> >>>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>>> --- >>>>>> >>>>>>>> >>>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>>> >>>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>>> + if (vmcs12->vm_exit_controls & >>>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>>> + else >>>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>>> + } >>>>>>> >>>>>>> This is not correct. We still need to set the vmcs to >>>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>>> hardware state. The corresponding code is missing in your patch so >>>> far. >>>>>> >>>>>> I think something else maybe be missing here: assuming L0 handles >> exits >>>>>> for L2 without involving L1 (e.g. external interrupts or ept >>>> violations), >>>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is >>>> not >>>>>> aware of these exits and from L1 perspective L2 was running on the >> CPU. >>>>>> That means that we may need to reduce these cycles spent at >>>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>>> force a transition to L1 instead of resuming L2. >>>>> >>>>> That's precisely what the logic I described should achieve: reload the >>>>> value we saved on L2 exit on reentry. >>>> >>>> But don't you think we should also reduce the cycles spent at L0 from >> the >>>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 >> exit >>>> which was not forwarded to L1, then, before we resume L2, >>>> the preemption timer should be: (previous_value_on_exit - X). >>>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>>> preemption timer exit between L2 and L1. >>> >>> We ask the hardware to save the value of the preemption on L2 exit. This >>> value will be exposed to L1 (if it asked for saving as well) and/or be >>> written back to the hardware on L2 reenty (unless L1 had a chance to run >>> and modified it). So the time spent in L0 is implicitly subtracted. >> >> I think you are suggesting the following, please correct me if I am wrong. >> 1) L1 resumes L2 with preemption timer enabled >> 2) L0 emulates the resume/launch >> 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption >> timer specified by L1) >> 4) L0 saved the preemption timer (original value - Y) >> 5) L0 spends X cycles handling the external interrupt >> 6) L0 resumes L2 with preemption timer = original value - Y >> >> Note that in this case "X is ignored". > > Yes, but see my other reply. > >> >> I was suggesting to do the following: >> 6) If original value - Y - X > 0 then >> L0 resumes L2 with preemption timer = original value - Y - X >> else >> L0 emulates a L2->L1 preemption timer exit (resumes L1) > > Almost . 6) should be: > If exit to L1 occurred after last L2, set X to 0. Then load MAX(original > value - Y - X, 0). Hmm, no: If exit to L1 occurred after last L2, load value of vmcs12, else load MAX(original value - Y - X, 0). Jan
Jan Kiszka <jan.kiszka@web.de> wrote on 25/08/2013 11:27:22 AM: > From: Jan Kiszka <jan.kiszka@web.de> > To: Abel Gordon/Haifa/IBM@IBMIL, > Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, > "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > Date: 25/08/2013 11:27 AM > Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption timer > > On 2013-08-25 10:25, Jan Kiszka wrote: > > On 2013-08-25 10:18, Abel Gordon wrote: > >> > >> > >> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: > >> > >>> From: Jan Kiszka <jan.kiszka@web.de> > >>> To: Abel Gordon/Haifa/IBM@IBMIL, > >>> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, > >>> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > >>> Date: 25/08/2013 10:54 AM > >>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > >> timer > >>> Sent by: kvm-owner@vger.kernel.org > >>> > >>> On 2013-08-25 09:50, Abel Gordon wrote: > >>>> > >>>> > >>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: > >>>> > >>>>> From: Jan Kiszka <jan.kiszka@web.de> > >>>>> To: Abel Gordon/Haifa/IBM@IBMIL, > >>>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, > >>>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> > >>>>> Date: 25/08/2013 10:43 AM > >>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > >>>> timer > >>>>> Sent by: kvm-owner@vger.kernel.org > >>>>> > >>>>> On 2013-08-25 09:37, Abel Gordon wrote: > >>>>>> > >>>>>> > >>>>>>> From: Jan Kiszka <jan.kiszka@web.de> > >>>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, > >>>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com > >>>>>>> Date: 25/08/2013 09:44 AM > >>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX > >> preemption > >>>>>> timer > >>>>>>> Sent by: kvm-owner@vger.kernel.org > >>>>>>> > >>>>>>> On 2013-08-24 20:44, root wrote: > >>>>>>>> This patch contains the following two changes: > >>>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 > >>>>>>>> with some reasons not emulated by L1, preemption timer value should > >>>>>>>> be save in such exits. > >>>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit > >> controls > >>>>>>>> to nVMX. > >>>>>>>> > >>>>>>>> With this patch, nested VMX preemption timer features are fully > >>>>>>>> supported. > >>>>>>>> > >>>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> > >>>>>>>> --- > >>>>>> > >>>>>>>> > >>>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu > >>>>>>> *vcpu, struct vmcs12 *vmcs12) > >>>>>>>> (vmcs_config.pin_based_exec_ctrl | > >>>>>>>> vmcs12->pin_based_vm_exec_control)); > >>>>>>>> > >>>>>>>> - if (vmcs12->pin_based_vm_exec_control & > >>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) > >>>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>>>>>>> - vmcs12->vmx_preemption_timer_value); > >>>>>>>> + if (vmcs12->pin_based_vm_exec_control & > >>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { > >>>>>>>> + if (vmcs12->vm_exit_controls & > >>>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) > >>>>>>>> + vmcs12->vmx_preemption_timer_value = > >>>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); > >>>>>>>> + else > >>>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, > >>>>>>>> + vmcs12->vmx_preemption_timer_value); > >>>>>>>> + } > >>>>>>> > >>>>>>> This is not correct. We still need to set the vmcs to > >>>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, > >>>>>>> vmx_preemption_timer_value has to be updated according to the saved > >>>>>>> hardware state. The corresponding code is missing in your patch so > >>>> far. > >>>>>> > >>>>>> I think something else maybe be missing here: assuming L0 handles > >> exits > >>>>>> for L2 without involving L1 (e.g. external interrupts or ept > >>>> violations), > >>>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is > >>>> not > >>>>>> aware of these exits and from L1 perspective L2 was running on the > >> CPU. > >>>>>> That means that we may need to reduce these cycles spent at > >>>>>> L0 from the preemtion timer or emulate a preemption timer exit to > >>>>>> force a transition to L1 instead of resuming L2. > >>>>> > >>>>> That's precisely what the logic I described should achieve: reload the > >>>>> value we saved on L2 exit on reentry. > >>>> > >>>> But don't you think we should also reduce the cycles spent at L0 from > >> the > >>>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 > >> exit > >>>> which was not forwarded to L1, then, before we resume L2, > >>>> the preemption timer should be: (previous_value_on_exit - X). > >>>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a > >>>> preemption timer exit between L2 and L1. > >>> > >>> We ask the hardware to save the value of the preemption on L2 exit. This > >>> value will be exposed to L1 (if it asked for saving as well) and/or be > >>> written back to the hardware on L2 reenty (unless L1 had a chance to run > >>> and modified it). So the time spent in L0 is implicitly subtracted. > >> > >> I think you are suggesting the following, please correct me if I am wrong. > >> 1) L1 resumes L2 with preemption timer enabled > >> 2) L0 emulates the resume/launch > >> 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption > >> timer specified by L1) > >> 4) L0 saved the preemption timer (original value - Y) > >> 5) L0 spends X cycles handling the external interrupt > >> 6) L0 resumes L2 with preemption timer = original value - Y > >> > >> Note that in this case "X is ignored". > > > > Yes, but see my other reply. I sent my reply before I read yours, sorry. Anyway, we are now on the same page ;) > >> I was suggesting to do the following: > >> 6) If original value - Y - X > 0 then > >> L0 resumes L2 with preemption timer = original value - Y - X > >> else > >> L0 emulates a L2->L1 preemption timer exit (resumes L1) > > > > Almost . 6) should be: > > If exit to L1 occurred after last L2, set X to 0. Then load MAX (original > > value - Y - X, 0). > > Hmm, no: > > If exit to L1 occurred after last L2, load value of vmcs12, else load > MAX(original > value - Y - X, 0). Note you are resuming L2 to force an immediate exit. I agree this approach will be easier and cleaner to implement/maintain but it could force one more exit and entry. Anyway, any approach is welcome as long as it considers the cycles spent at L0 ("X") as we previously discussed and agreed. Regards, Abel. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Aug 25, 2013 at 4:18 PM, Abel Gordon <ABELG@il.ibm.com> wrote: > > > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: > >> From: Jan Kiszka <jan.kiszka@web.de> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >> Date: 25/08/2013 10:54 AM >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption > timer >> Sent by: kvm-owner@vger.kernel.org >> >> On 2013-08-25 09:50, Abel Gordon wrote: >> > >> > >> > kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >> > >> >> From: Jan Kiszka <jan.kiszka@web.de> >> >> To: Abel Gordon/Haifa/IBM@IBMIL, >> >> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >> >> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >> >> Date: 25/08/2013 10:43 AM >> >> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >> > timer >> >> Sent by: kvm-owner@vger.kernel.org >> >> >> >> On 2013-08-25 09:37, Abel Gordon wrote: >> >>> >> >>> >> >>>> From: Jan Kiszka <jan.kiszka@web.de> >> >>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >> >>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >> >>>> Date: 25/08/2013 09:44 AM >> >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX > preemption >> >>> timer >> >>>> Sent by: kvm-owner@vger.kernel.org >> >>>> >> >>>> On 2013-08-24 20:44, root wrote: >> >>>>> This patch contains the following two changes: >> >>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >> >>>>> with some reasons not emulated by L1, preemption timer value should >> >>>>> be save in such exits. >> >>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit > controls >> >>>>> to nVMX. >> >>>>> >> >>>>> With this patch, nested VMX preemption timer features are fully >> >>>>> supported. >> >>>>> >> >>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >> >>>>> --- >> >>> >> >>>>> >> >>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >> >>>> *vcpu, struct vmcs12 *vmcs12) >> >>>>> (vmcs_config.pin_based_exec_ctrl | >> >>>>> vmcs12->pin_based_vm_exec_control)); >> >>>>> >> >>>>> - if (vmcs12->pin_based_vm_exec_control & >> >>> PIN_BASED_VMX_PREEMPTION_TIMER) >> >>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> >>>>> - vmcs12->vmx_preemption_timer_value); >> >>>>> + if (vmcs12->pin_based_vm_exec_control & >> >>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >> >>>>> + if (vmcs12->vm_exit_controls & >> >>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >> >>>>> + vmcs12->vmx_preemption_timer_value = >> >>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >> >>>>> + else >> >>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >> >>>>> + vmcs12->vmx_preemption_timer_value); >> >>>>> + } >> >>>> >> >>>> This is not correct. We still need to set the vmcs to >> >>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >> >>>> vmx_preemption_timer_value has to be updated according to the saved >> >>>> hardware state. The corresponding code is missing in your patch so >> > far. >> >>> >> >>> I think something else maybe be missing here: assuming L0 handles > exits >> >>> for L2 without involving L1 (e.g. external interrupts or ept >> > violations), >> >>> then, we may spend some cycles in L0 handling these exits. Note L1 is >> > not >> >>> aware of these exits and from L1 perspective L2 was running on the > CPU. >> >>> That means that we may need to reduce these cycles spent at >> >>> L0 from the preemtion timer or emulate a preemption timer exit to >> >>> force a transition to L1 instead of resuming L2. >> >> >> >> That's precisely what the logic I described should achieve: reload the >> >> value we saved on L2 exit on reentry. >> > >> > But don't you think we should also reduce the cycles spent at L0 from > the >> > preemption timer ? I mean, if we spent X cycles at L0 handling a L2 > exit >> > which was not forwarded to L1, then, before we resume L2, >> > the preemption timer should be: (previous_value_on_exit - X). >> > If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >> > preemption timer exit between L2 and L1. >> >> We ask the hardware to save the value of the preemption on L2 exit. This >> value will be exposed to L1 (if it asked for saving as well) and/or be >> written back to the hardware on L2 reenty (unless L1 had a chance to run >> and modified it). So the time spent in L0 is implicitly subtracted. > > I think you are suggesting the following, please correct me if I am wrong. > 1) L1 resumes L2 with preemption timer enabled > 2) L0 emulates the resume/launch > 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption > timer specified by L1) > 4) L0 saved the preemption timer (original value - Y) > 5) L0 spends X cycles handling the external interrupt > 6) L0 resumes L2 with preemption timer = original value - Y > > Note that in this case "X is ignored". > > I was suggesting to do the following: > 6) If original value - Y - X > 0 then > L0 resumes L2 with preemption timer = original value - Y - X > else > L0 emulates a L2->L1 preemption timer exit (resumes L1) Yes, your description is right. But I'm also thinking about my previous consideration, why should we consider such X cycles as what L2 spent. For nested VMX. external interrupt is not provided by L1, it is triggered from L0 and want to cause periodically exit to L1, L2 is "accidentally injure" actually. Since these interrupts are not generated from L1 and not attend to affect L2, these cycles should not be treated as what L2 spent. Though these cycles are "spent" in view of L1, but they should not be taken into consideration in nested VMX. For another example, if vcpu scheduled out when L0 handing such interrupts and CPU does some other things then schedule this vcpu again, these cycles of executing other processes should not be treated as what L2 spent definitely. Arthur > > > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 10:41, Arthur Chunqi Li wrote: > On Sun, Aug 25, 2013 at 4:18 PM, Abel Gordon <ABELG@il.ibm.com> wrote: >> >> >> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: >> >>> From: Jan Kiszka <jan.kiszka@web.de> >>> To: Abel Gordon/Haifa/IBM@IBMIL, >>> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >>> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>> Date: 25/08/2013 10:54 AM >>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >> timer >>> Sent by: kvm-owner@vger.kernel.org >>> >>> On 2013-08-25 09:50, Abel Gordon wrote: >>>> >>>> >>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>>> >>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>>> Date: 25/08/2013 10:43 AM >>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>>> timer >>>>> Sent by: kvm-owner@vger.kernel.org >>>>> >>>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>>> >>>>>> >>>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>>> Date: 25/08/2013 09:44 AM >>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX >> preemption >>>>>> timer >>>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>>> >>>>>>> On 2013-08-24 20:44, root wrote: >>>>>>>> This patch contains the following two changes: >>>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>>>> be save in such exits. >>>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit >> controls >>>>>>>> to nVMX. >>>>>>>> >>>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>>> supported. >>>>>>>> >>>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>>> --- >>>>>> >>>>>>>> >>>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>>> >>>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>>> + if (vmcs12->vm_exit_controls & >>>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>>> + else >>>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>>> + } >>>>>>> >>>>>>> This is not correct. We still need to set the vmcs to >>>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>>> hardware state. The corresponding code is missing in your patch so >>>> far. >>>>>> >>>>>> I think something else maybe be missing here: assuming L0 handles >> exits >>>>>> for L2 without involving L1 (e.g. external interrupts or ept >>>> violations), >>>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is >>>> not >>>>>> aware of these exits and from L1 perspective L2 was running on the >> CPU. >>>>>> That means that we may need to reduce these cycles spent at >>>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>>> force a transition to L1 instead of resuming L2. >>>>> >>>>> That's precisely what the logic I described should achieve: reload the >>>>> value we saved on L2 exit on reentry. >>>> >>>> But don't you think we should also reduce the cycles spent at L0 from >> the >>>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 >> exit >>>> which was not forwarded to L1, then, before we resume L2, >>>> the preemption timer should be: (previous_value_on_exit - X). >>>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>>> preemption timer exit between L2 and L1. >>> >>> We ask the hardware to save the value of the preemption on L2 exit. This >>> value will be exposed to L1 (if it asked for saving as well) and/or be >>> written back to the hardware on L2 reenty (unless L1 had a chance to run >>> and modified it). So the time spent in L0 is implicitly subtracted. >> >> I think you are suggesting the following, please correct me if I am wrong. >> 1) L1 resumes L2 with preemption timer enabled >> 2) L0 emulates the resume/launch >> 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption >> timer specified by L1) >> 4) L0 saved the preemption timer (original value - Y) >> 5) L0 spends X cycles handling the external interrupt >> 6) L0 resumes L2 with preemption timer = original value - Y >> >> Note that in this case "X is ignored". >> >> I was suggesting to do the following: >> 6) If original value - Y - X > 0 then >> L0 resumes L2 with preemption timer = original value - Y - X >> else >> L0 emulates a L2->L1 preemption timer exit (resumes L1) > Yes, your description is right. But I'm also thinking about my > previous consideration, why should we consider such X cycles as what > L2 spent. For nested VMX. external interrupt is not provided by L1, it > is triggered from L0 and want to cause periodically exit to L1, L2 is > "accidentally injure" actually. Since these interrupts are not > generated from L1 and not attend to affect L2, these cycles should not > be treated as what L2 spent. Though these cycles are "spent" in view > of L1, but they should not be taken into consideration in nested VMX. > > For another example, if vcpu scheduled out when L0 handing such > interrupts and CPU does some other things then schedule this vcpu > again, these cycles of executing other processes should not be treated > as what L2 spent definitely. Think of your preemption timer test case: There you are indirectly comparing the timer value against the TSC by checking the a preemption timer exit happened after no more than n TSC cycles. But as the TSC L1 and L2 sees continued to tick while in L0, this test could now fail when we leave out the L0 cycles. An alternative would be to hide all L0 TSC cycles from the guest. But that's not the way KVM works, independent of the preemption timer case. BTW, you should use guest_read_tsc() on exit/entry of L2 in order to calculate the time spent in L0. This will ensure that potential tweaks of TSC_OFFSET that L0 might have applied in the meantime will be taken into account. Jan
On Sun, Aug 25, 2013 at 4:53 PM, Jan Kiszka <jan.kiszka@web.de> wrote: > On 2013-08-25 10:41, Arthur Chunqi Li wrote: >> On Sun, Aug 25, 2013 at 4:18 PM, Abel Gordon <ABELG@il.ibm.com> wrote: >>> >>> >>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: >>> >>>> From: Jan Kiszka <jan.kiszka@web.de> >>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >>>> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>> Date: 25/08/2013 10:54 AM >>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>> timer >>>> Sent by: kvm-owner@vger.kernel.org >>>> >>>> On 2013-08-25 09:50, Abel Gordon wrote: >>>>> >>>>> >>>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>>>> >>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>>>> Date: 25/08/2013 10:43 AM >>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>>>> timer >>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>> >>>>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>>>> >>>>>>> >>>>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>>>> Date: 25/08/2013 09:44 AM >>>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX >>> preemption >>>>>>> timer >>>>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>>>> >>>>>>>> On 2013-08-24 20:44, root wrote: >>>>>>>>> This patch contains the following two changes: >>>>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>>>>> be save in such exits. >>>>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit >>> controls >>>>>>>>> to nVMX. >>>>>>>>> >>>>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>>>> supported. >>>>>>>>> >>>>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>>>> --- >>>>>>> >>>>>>>>> >>>>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>>>> >>>>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>>>> + if (vmcs12->vm_exit_controls & >>>>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>>>> + else >>>>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>>>> + } >>>>>>>> >>>>>>>> This is not correct. We still need to set the vmcs to >>>>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>>>> hardware state. The corresponding code is missing in your patch so >>>>> far. >>>>>>> >>>>>>> I think something else maybe be missing here: assuming L0 handles >>> exits >>>>>>> for L2 without involving L1 (e.g. external interrupts or ept >>>>> violations), >>>>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is >>>>> not >>>>>>> aware of these exits and from L1 perspective L2 was running on the >>> CPU. >>>>>>> That means that we may need to reduce these cycles spent at >>>>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>>>> force a transition to L1 instead of resuming L2. >>>>>> >>>>>> That's precisely what the logic I described should achieve: reload the >>>>>> value we saved on L2 exit on reentry. >>>>> >>>>> But don't you think we should also reduce the cycles spent at L0 from >>> the >>>>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 >>> exit >>>>> which was not forwarded to L1, then, before we resume L2, >>>>> the preemption timer should be: (previous_value_on_exit - X). >>>>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>>>> preemption timer exit between L2 and L1. >>>> >>>> We ask the hardware to save the value of the preemption on L2 exit. This >>>> value will be exposed to L1 (if it asked for saving as well) and/or be >>>> written back to the hardware on L2 reenty (unless L1 had a chance to run >>>> and modified it). So the time spent in L0 is implicitly subtracted. >>> >>> I think you are suggesting the following, please correct me if I am wrong. >>> 1) L1 resumes L2 with preemption timer enabled >>> 2) L0 emulates the resume/launch >>> 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption >>> timer specified by L1) >>> 4) L0 saved the preemption timer (original value - Y) >>> 5) L0 spends X cycles handling the external interrupt >>> 6) L0 resumes L2 with preemption timer = original value - Y >>> >>> Note that in this case "X is ignored". >>> >>> I was suggesting to do the following: >>> 6) If original value - Y - X > 0 then >>> L0 resumes L2 with preemption timer = original value - Y - X >>> else >>> L0 emulates a L2->L1 preemption timer exit (resumes L1) >> Yes, your description is right. But I'm also thinking about my >> previous consideration, why should we consider such X cycles as what >> L2 spent. For nested VMX. external interrupt is not provided by L1, it >> is triggered from L0 and want to cause periodically exit to L1, L2 is >> "accidentally injure" actually. Since these interrupts are not >> generated from L1 and not attend to affect L2, these cycles should not >> be treated as what L2 spent. Though these cycles are "spent" in view >> of L1, but they should not be taken into consideration in nested VMX. >> >> For another example, if vcpu scheduled out when L0 handing such >> interrupts and CPU does some other things then schedule this vcpu >> again, these cycles of executing other processes should not be treated >> as what L2 spent definitely. > > Think of your preemption timer test case: There you are indirectly > comparing the timer value against the TSC by checking the a preemption > timer exit happened after no more than n TSC cycles. But as the TSC L1 > and L2 sees continued to tick while in L0, this test could now fail when > we leave out the L0 cycles. > > An alternative would be to hide all L0 TSC cycles from the guest. But > that's not the way KVM works, independent of the preemption timer case. > > BTW, you should use guest_read_tsc() on exit/entry of L2 in order to > calculate the time spent in L0. This will ensure that potential tweaks > of TSC_OFFSET that L0 might have applied in the meantime will be taken > into account. Well, in this case, these X cycles is actually not in L1 and L2, but it is treated that L2 consumes them, which seems like these cycles are "stolen". Arthur > > Jan > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-25 11:07, Arthur Chunqi Li wrote: > On Sun, Aug 25, 2013 at 4:53 PM, Jan Kiszka <jan.kiszka@web.de> wrote: >> On 2013-08-25 10:41, Arthur Chunqi Li wrote: >>> On Sun, Aug 25, 2013 at 4:18 PM, Abel Gordon <ABELG@il.ibm.com> wrote: >>>> >>>> >>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:54:13 AM: >>>> >>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>>> Cc: gleb@redhat.com, kvm <kvm@vger.kernel.org>, pbonzini@redhat.com, >>>>> "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>>> Date: 25/08/2013 10:54 AM >>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>>> timer >>>>> Sent by: kvm-owner@vger.kernel.org >>>>> >>>>> On 2013-08-25 09:50, Abel Gordon wrote: >>>>>> >>>>>> >>>>>> kvm-owner@vger.kernel.org wrote on 25/08/2013 10:43:12 AM: >>>>>> >>>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>>> To: Abel Gordon/Haifa/IBM@IBMIL, >>>>>>> Cc: gleb@redhat.com, kvm@vger.kernel.org, kvm-owner@vger.kernel.org, >>>>>>> pbonzini@redhat.com, "??? <Arthur Chunqi Li>" <yzt356@gmail.com> >>>>>>> Date: 25/08/2013 10:43 AM >>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX preemption >>>>>> timer >>>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>>> >>>>>>> On 2013-08-25 09:37, Abel Gordon wrote: >>>>>>>> >>>>>>>> >>>>>>>>> From: Jan Kiszka <jan.kiszka@web.de> >>>>>>>>> To: "??? <Arthur Chunqi Li>" <yzt356@gmail.com>, >>>>>>>>> Cc: kvm@vger.kernel.org, gleb@redhat.com, pbonzini@redhat.com >>>>>>>>> Date: 25/08/2013 09:44 AM >>>>>>>>> Subject: Re: [PATCH] KVM: nVMX: Fully support of nested VMX >>>> preemption >>>>>>>> timer >>>>>>>>> Sent by: kvm-owner@vger.kernel.org >>>>>>>>> >>>>>>>>> On 2013-08-24 20:44, root wrote: >>>>>>>>>> This patch contains the following two changes: >>>>>>>>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 >>>>>>>>>> with some reasons not emulated by L1, preemption timer value should >>>>>>>>>> be save in such exits. >>>>>>>>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit >>>> controls >>>>>>>>>> to nVMX. >>>>>>>>>> >>>>>>>>>> With this patch, nested VMX preemption timer features are fully >>>>>>>>>> supported. >>>>>>>>>> >>>>>>>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> >>>>>>>>>> --- >>>>>>>> >>>>>>>>>> >>>>>>>>>> @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu >>>>>>>>> *vcpu, struct vmcs12 *vmcs12) >>>>>>>>>> (vmcs_config.pin_based_exec_ctrl | >>>>>>>>>> vmcs12->pin_based_vm_exec_control)); >>>>>>>>>> >>>>>>>>>> - if (vmcs12->pin_based_vm_exec_control & >>>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) >>>>>>>>>> - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>>>> - vmcs12->vmx_preemption_timer_value); >>>>>>>>>> + if (vmcs12->pin_based_vm_exec_control & >>>>>>>>> PIN_BASED_VMX_PREEMPTION_TIMER) { >>>>>>>>>> + if (vmcs12->vm_exit_controls & >>>>>>>> VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) >>>>>>>>>> + vmcs12->vmx_preemption_timer_value = >>>>>>>>>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); >>>>>>>>>> + else >>>>>>>>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, >>>>>>>>>> + vmcs12->vmx_preemption_timer_value); >>>>>>>>>> + } >>>>>>>>> >>>>>>>>> This is not correct. We still need to set the vmcs to >>>>>>>>> vmx_preemption_timer_value. The difference is that, on exit from L2, >>>>>>>>> vmx_preemption_timer_value has to be updated according to the saved >>>>>>>>> hardware state. The corresponding code is missing in your patch so >>>>>> far. >>>>>>>> >>>>>>>> I think something else maybe be missing here: assuming L0 handles >>>> exits >>>>>>>> for L2 without involving L1 (e.g. external interrupts or ept >>>>>> violations), >>>>>>>> then, we may spend some cycles in L0 handling these exits. Note L1 is >>>>>> not >>>>>>>> aware of these exits and from L1 perspective L2 was running on the >>>> CPU. >>>>>>>> That means that we may need to reduce these cycles spent at >>>>>>>> L0 from the preemtion timer or emulate a preemption timer exit to >>>>>>>> force a transition to L1 instead of resuming L2. >>>>>>> >>>>>>> That's precisely what the logic I described should achieve: reload the >>>>>>> value we saved on L2 exit on reentry. >>>>>> >>>>>> But don't you think we should also reduce the cycles spent at L0 from >>>> the >>>>>> preemption timer ? I mean, if we spent X cycles at L0 handling a L2 >>>> exit >>>>>> which was not forwarded to L1, then, before we resume L2, >>>>>> the preemption timer should be: (previous_value_on_exit - X). >>>>>> If (previous_value_on_exit - X) < 0, then we should force ("emulate") a >>>>>> preemption timer exit between L2 and L1. >>>>> >>>>> We ask the hardware to save the value of the preemption on L2 exit. This >>>>> value will be exposed to L1 (if it asked for saving as well) and/or be >>>>> written back to the hardware on L2 reenty (unless L1 had a chance to run >>>>> and modified it). So the time spent in L0 is implicitly subtracted. >>>> >>>> I think you are suggesting the following, please correct me if I am wrong. >>>> 1) L1 resumes L2 with preemption timer enabled >>>> 2) L0 emulates the resume/launch >>>> 3) L2 runs for Y cycles until an external interrupt occurs (Y < preemption >>>> timer specified by L1) >>>> 4) L0 saved the preemption timer (original value - Y) >>>> 5) L0 spends X cycles handling the external interrupt >>>> 6) L0 resumes L2 with preemption timer = original value - Y >>>> >>>> Note that in this case "X is ignored". >>>> >>>> I was suggesting to do the following: >>>> 6) If original value - Y - X > 0 then >>>> L0 resumes L2 with preemption timer = original value - Y - X >>>> else >>>> L0 emulates a L2->L1 preemption timer exit (resumes L1) >>> Yes, your description is right. But I'm also thinking about my >>> previous consideration, why should we consider such X cycles as what >>> L2 spent. For nested VMX. external interrupt is not provided by L1, it >>> is triggered from L0 and want to cause periodically exit to L1, L2 is >>> "accidentally injure" actually. Since these interrupts are not >>> generated from L1 and not attend to affect L2, these cycles should not >>> be treated as what L2 spent. Though these cycles are "spent" in view >>> of L1, but they should not be taken into consideration in nested VMX. >>> >>> For another example, if vcpu scheduled out when L0 handing such >>> interrupts and CPU does some other things then schedule this vcpu >>> again, these cycles of executing other processes should not be treated >>> as what L2 spent definitely. >> >> Think of your preemption timer test case: There you are indirectly >> comparing the timer value against the TSC by checking the a preemption >> timer exit happened after no more than n TSC cycles. But as the TSC L1 >> and L2 sees continued to tick while in L0, this test could now fail when >> we leave out the L0 cycles. >> >> An alternative would be to hide all L0 TSC cycles from the guest. But >> that's not the way KVM works, independent of the preemption timer case. >> >> BTW, you should use guest_read_tsc() on exit/entry of L2 in order to >> calculate the time spent in L0. This will ensure that potential tweaks >> of TSC_OFFSET that L0 might have applied in the meantime will be taken >> into account. > Well, in this case, these X cycles is actually not in L1 and L2, but > it is treated that L2 consumes them, which seems like these cycles are > "stolen". Yes, they are stolen by L0 from L2. Jan
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 57b4e12..9579409 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2204,7 +2204,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER); @@ -7578,9 +7579,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs_config.pin_based_exec_ctrl | vmcs12->pin_based_vm_exec_control)); - if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, - vmcs12->vmx_preemption_timer_value); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) { + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) + vmcs12->vmx_preemption_timer_value = + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); + else + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + } /* * Whether page-faults are trapped is determined by a combination of @@ -7690,7 +7696,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits are further modified by vmx_set_efer() below. */ - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl | + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER); + else + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. @@ -7912,6 +7922,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } /* + * If L2 support PIN_BASED_VMX_PREEMPTION_TIMER, L0 must support + * VM_EXIT_SAVE_VMX_PREEMPTION_TIMER. + */ + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + + /* * We're finally done with prerequisite checking, and can start with * the nested entry. */
This patch contains the following two changes: 1. Fix the bug in nested preemption timer support. If vmexit L2->L0 with some reasons not emulated by L1, preemption timer value should be save in such exits. 2. Add support of "Save VMX-preemption timer value" VM-Exit controls to nVMX. With this patch, nested VMX preemption timer features are fully supported. Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com> --- arch/x86/kvm/vmx.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-)