Message ID | 1375366117-9014-2-git-send-email-gleb@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Gleb Natapov wrote on 2013-08-01: > From: Nadav Har'El <nyh@il.ibm.com> > > Recent KVM, since > http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577 > switch the EFER MSR when EPT is used and the host and guest have different > NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2) > and want to be able to run recent KVM as L1, we need to allow L1 to use this > EFER switching feature. > > To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if > available, > and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds > support for the former (the latter is still unsupported). > > Nested entry and exit emulation (prepare_vmcs_02 and > load_vmcs12_host_state, > respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So > all > that's left to do in this patch is to properly advertise this feature to L1. > > Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by > using > vmx_set_efer (which itself sets one of several vmcs02 fields), so we always > support this feature, regardless of whether the host supports it. > > Reviewed-by: Orit Wasserman <owasserm@redhat.com> > Signed-off-by: Nadav Har'El <nyh@il.ibm.com> > Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> > Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > Signed-off-by: Gleb Natapov <gleb@redhat.com> > --- > arch/x86/kvm/vmx.c | 23 ++++++++++++++++------- > 1 file changed, 16 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index e999dc7..27efa6a 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2198,7 +2198,8 @@ static __init void > nested_vmx_setup_ctls_msrs(void) > #else > nested_vmx_exit_ctls_high = 0; > #endif > - nested_vmx_exit_ctls_high |= > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; > + nested_vmx_exit_ctls_high |= > (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > + VM_EXIT_LOAD_IA32_EFER); > > /* entry controls */ > rdmsr(MSR_IA32_VMX_ENTRY_CTLS, > @@ -2207,8 +2208,8 @@ static __init void > nested_vmx_setup_ctls_msrs(void) > nested_vmx_entry_ctls_low = > VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; > nested_vmx_entry_ctls_high &= > VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; > - nested_vmx_entry_ctls_high |= > VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; > - > + nested_vmx_entry_ctls_high |= > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | > + VM_ENTRY_LOAD_IA32_EFER); Just saw it, we didn't expose bit22 save VMX-preemption timer in vm-exit control but we already allowed guest to set active VMX-preemption timer in pin based vm-execution conrols. This is wrong. > /* cpu-based controls */ > rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, > nested_vmx_procbased_ctls_low, > nested_vmx_procbased_ctls_high); > @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu > *vcpu, struct vmcs12 *vmcs12) > vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; > vmcs_writel(CR0_GUEST_HOST_MASK, > ~vcpu->arch.cr0_guest_owned_bits); > > - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer > below */ > - vmcs_write32(VM_EXIT_CONTROLS, > - vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); > - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | > + /* L2->L1 exit controls are emulated - the hardware exit is to L0 so > + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER > + * bits are further modified by vmx_set_efer() below. > + */ > + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); Should we mentioned that save vmx preemption bit must use host|guest, not just host? > + > + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE > are > + * emulated by vmx_set_efer(), below. > + */ > + vmcs_write32(VM_ENTRY_CONTROLS, > + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & > + ~VM_ENTRY_IA32E_MODE) | > (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); > > if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) > -- > 1.7.10.4 Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2013-08-02 05:04, Zhang, Yang Z wrote: > Gleb Natapov wrote on 2013-08-01: >> From: Nadav Har'El <nyh@il.ibm.com> >> >> Recent KVM, since >> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577 >> switch the EFER MSR when EPT is used and the host and guest have different >> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2) >> and want to be able to run recent KVM as L1, we need to allow L1 to use this >> EFER switching feature. >> >> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if >> available, >> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds >> support for the former (the latter is still unsupported). >> >> Nested entry and exit emulation (prepare_vmcs_02 and >> load_vmcs12_host_state, >> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So >> all >> that's left to do in this patch is to properly advertise this feature to L1. >> >> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by >> using >> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always >> support this feature, regardless of whether the host supports it. >> >> Reviewed-by: Orit Wasserman <owasserm@redhat.com> >> Signed-off-by: Nadav Har'El <nyh@il.ibm.com> >> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> >> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >> Signed-off-by: Gleb Natapov <gleb@redhat.com> >> --- >> arch/x86/kvm/vmx.c | 23 ++++++++++++++++------- >> 1 file changed, 16 insertions(+), 7 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index e999dc7..27efa6a 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -2198,7 +2198,8 @@ static __init void >> nested_vmx_setup_ctls_msrs(void) >> #else >> nested_vmx_exit_ctls_high = 0; >> #endif >> - nested_vmx_exit_ctls_high |= >> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; >> + nested_vmx_exit_ctls_high |= >> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> + VM_EXIT_LOAD_IA32_EFER); >> >> /* entry controls */ >> rdmsr(MSR_IA32_VMX_ENTRY_CTLS, >> @@ -2207,8 +2208,8 @@ static __init void >> nested_vmx_setup_ctls_msrs(void) >> nested_vmx_entry_ctls_low = >> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >> nested_vmx_entry_ctls_high &= >> VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; >> - nested_vmx_entry_ctls_high |= >> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >> - >> + nested_vmx_entry_ctls_high |= >> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | >> + VM_ENTRY_LOAD_IA32_EFER); > Just saw it, we didn't expose bit22 save VMX-preemption timer in vm-exit control but we already allowed guest to set active VMX-preemption timer in pin based vm-execution conrols. This is wrong. Does the presence of preemption timer support imply that saving its value is also supported? Then we could demand this combination (ie. do not expose preemption timer support at all to L1 if value saving is missing) and build our preemption timer emulation on top. There is more broken /wrt VMX preemption timer, patches are welcome. Arthur will also try to develop test cases for it. But that topic is unrelated to this series. Jan > >> /* cpu-based controls */ >> rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, >> nested_vmx_procbased_ctls_low, >> nested_vmx_procbased_ctls_high); >> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu >> *vcpu, struct vmcs12 *vmcs12) >> vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; >> vmcs_writel(CR0_GUEST_HOST_MASK, >> ~vcpu->arch.cr0_guest_owned_bits); >> >> - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer >> below */ >> - vmcs_write32(VM_EXIT_CONTROLS, >> - vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); >> - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | >> + /* L2->L1 exit controls are emulated - the hardware exit is to L0 so >> + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >> + * bits are further modified by vmx_set_efer() below. >> + */ >> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); > Should we mentioned that save vmx preemption bit must use host|guest, not just host? > >> + >> + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE >> are >> + * emulated by vmx_set_efer(), below. >> + */ >> + vmcs_write32(VM_ENTRY_CONTROLS, >> + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & >> + ~VM_ENTRY_IA32E_MODE) | >> (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); >> >> if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) >> -- >> 1.7.10.4 > > Best regards, > Yang
Jan Kiszka wrote on 2013-08-02: > On 2013-08-02 05:04, Zhang, Yang Z wrote: >> Gleb Natapov wrote on 2013-08-01: >>> From: Nadav Har'El <nyh@il.ibm.com> >>> >>> Recent KVM, since >>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577 >>> switch the EFER MSR when EPT is used and the host and guest have >>> different NX bits. So if we add support for nested EPT (L1 guest >>> using EPT to run L2) and want to be able to run recent KVM as L1, we >>> need to allow L1 to use this EFER switching feature. >>> >>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if >>> available, and if it isn't, it uses the generic >>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former (the >>> latter is still unsupported). >>> >>> Nested entry and exit emulation (prepare_vmcs_02 and >>> load_vmcs12_host_state, >>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. >>> So all that's left to do in this patch is to properly advertise this >>> feature to L1. >>> >>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, >>> by using vmx_set_efer (which itself sets one of several vmcs02 >>> fields), so we always support this feature, regardless of whether >>> the host supports it. >>> >>> Reviewed-by: Orit Wasserman <owasserm@redhat.com> >>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com> >>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> >>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> >>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com> >>> --- >>> arch/x86/kvm/vmx.c | 23 ++++++++++++++++------- >>> 1 file changed, 16 insertions(+), 7 deletions(-) diff --git >>> a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e999dc7..27efa6a >>> 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -2198,7 +2198,8 @@ static __init void >>> nested_vmx_setup_ctls_msrs(void) >>> #else >>> nested_vmx_exit_ctls_high = 0; >>> #endif >>> - nested_vmx_exit_ctls_high |= >>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; >>> + nested_vmx_exit_ctls_high |= >>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>> + VM_EXIT_LOAD_IA32_EFER); >>> >>> /* entry controls */ >>> rdmsr(MSR_IA32_VMX_ENTRY_CTLS, >>> @@ -2207,8 +2208,8 @@ static __init void >>> nested_vmx_setup_ctls_msrs(void) >>> nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >>> nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | >>> VM_ENTRY_IA32E_MODE; >>> - nested_vmx_entry_ctls_high |= >>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >>> - >>> + nested_vmx_entry_ctls_high |= >>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | >>> + VM_ENTRY_LOAD_IA32_EFER); >> Just saw it, we didn't expose bit22 save VMX-preemption timer in >> vm-exit > control but we already allowed guest to set active VMX-preemption > timer in pin based vm-execution conrols. This is wrong. > > Does the presence of preemption timer support imply that saving its > value is also supported? Then we could demand this combination (ie. do > not expose preemption timer support at all to L1 if value saving is > missing) and build our preemption timer emulation on top. > I don't see we saved the preemption timer value to vmcs12 in prepare_vmcs12(). Will it be saved automatically? > There is more broken /wrt VMX preemption timer, patches are welcome. > Arthur will also try to develop test cases for it. But that topic is > unrelated to this series. > > Jan > >> >>> /* cpu-based controls */ >>> rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, >>> nested_vmx_procbased_ctls_low, >>> nested_vmx_procbased_ctls_high); >>> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu >>> *vcpu, struct vmcs12 *vmcs12) >>> vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; >>> vmcs_writel(CR0_GUEST_HOST_MASK, >>> ~vcpu->arch.cr0_guest_owned_bits); >>> >>> - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer >>> below */ >>> - vmcs_write32(VM_EXIT_CONTROLS, >>> - vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); >>> - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | >>> + /* L2->L1 exit controls are emulated - the hardware exit is to L0 so >>> + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >>> + * bits are further modified by vmx_set_efer() below. >>> + */ >>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >> Should we mentioned that save vmx preemption bit must use host|guest, >> not just host? >> >>> + >>> + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE >>> are >>> + * emulated by vmx_set_efer(), below. >>> + */ >>> + vmcs_write32(VM_ENTRY_CONTROLS, >>> + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & >>> + ~VM_ENTRY_IA32E_MODE) | >>> (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); >>> >>> if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) >>> -- >>> 1.7.10.4 >> >> Best regards, >> Yang > Best regards, Yang
On 2013-08-02 09:27, Zhang, Yang Z wrote: > Jan Kiszka wrote on 2013-08-02: >> On 2013-08-02 05:04, Zhang, Yang Z wrote: >>> Gleb Natapov wrote on 2013-08-01: >>>> From: Nadav Har'El <nyh@il.ibm.com> >>>> >>>> Recent KVM, since >>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577 >>>> switch the EFER MSR when EPT is used and the host and guest have >>>> different NX bits. So if we add support for nested EPT (L1 guest >>>> using EPT to run L2) and want to be able to run recent KVM as L1, we >>>> need to allow L1 to use this EFER switching feature. >>>> >>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if >>>> available, and if it isn't, it uses the generic >>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former (the >>>> latter is still unsupported). >>>> >>>> Nested entry and exit emulation (prepare_vmcs_02 and >>>> load_vmcs12_host_state, >>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. >>>> So all that's left to do in this patch is to properly advertise this >>>> feature to L1. >>>> >>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, >>>> by using vmx_set_efer (which itself sets one of several vmcs02 >>>> fields), so we always support this feature, regardless of whether >>>> the host supports it. >>>> >>>> Reviewed-by: Orit Wasserman <owasserm@redhat.com> >>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com> >>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com> >>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com> >>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >>>> Signed-off-by: Gleb Natapov <gleb@redhat.com> >>>> --- >>>> arch/x86/kvm/vmx.c | 23 ++++++++++++++++------- >>>> 1 file changed, 16 insertions(+), 7 deletions(-) diff --git >>>> a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e999dc7..27efa6a >>>> 100644 >>>> --- a/arch/x86/kvm/vmx.c >>>> +++ b/arch/x86/kvm/vmx.c >>>> @@ -2198,7 +2198,8 @@ static __init void >>>> nested_vmx_setup_ctls_msrs(void) >>>> #else >>>> nested_vmx_exit_ctls_high = 0; >>>> #endif >>>> - nested_vmx_exit_ctls_high |= >>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; >>>> + nested_vmx_exit_ctls_high |= >>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>>> + VM_EXIT_LOAD_IA32_EFER); >>>> >>>> /* entry controls */ >>>> rdmsr(MSR_IA32_VMX_ENTRY_CTLS, >>>> @@ -2207,8 +2208,8 @@ static __init void >>>> nested_vmx_setup_ctls_msrs(void) >>>> nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >>>> nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | >>>> VM_ENTRY_IA32E_MODE; >>>> - nested_vmx_entry_ctls_high |= >>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; >>>> - >>>> + nested_vmx_entry_ctls_high |= >>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | >>>> + VM_ENTRY_LOAD_IA32_EFER); >>> Just saw it, we didn't expose bit22 save VMX-preemption timer in >>> vm-exit >> control but we already allowed guest to set active VMX-preemption >> timer in pin based vm-execution conrols. This is wrong. >> >> Does the presence of preemption timer support imply that saving its >> value is also supported? Then we could demand this combination (ie. do >> not expose preemption timer support at all to L1 if value saving is >> missing) and build our preemption timer emulation on top. >> > I don't see we saved the preemption timer value to vmcs12 in prepare_vmcs12(). Will it be saved automatically? No. As I said, there is more broken with our preemption timer emulation. Jan > >> There is more broken /wrt VMX preemption timer, patches are welcome. >> Arthur will also try to develop test cases for it. But that topic is >> unrelated to this series. >> >> Jan >> >>> >>>> /* cpu-based controls */ >>>> rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, >>>> nested_vmx_procbased_ctls_low, >>>> nested_vmx_procbased_ctls_high); >>>> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu >>>> *vcpu, struct vmcs12 *vmcs12) >>>> vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; >>>> vmcs_writel(CR0_GUEST_HOST_MASK, >>>> ~vcpu->arch.cr0_guest_owned_bits); >>>> >>>> - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer >>>> below */ >>>> - vmcs_write32(VM_EXIT_CONTROLS, >>>> - vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); >>>> - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | >>>> + /* L2->L1 exit controls are emulated - the hardware exit is to L0 so >>>> + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER >>>> + * bits are further modified by vmx_set_efer() below. >>>> + */ >>>> + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); >>> Should we mentioned that save vmx preemption bit must use host|guest, >>> not just host? >>> >>>> + >>>> + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE >>>> are >>>> + * emulated by vmx_set_efer(), below. >>>> + */ >>>> + vmcs_write32(VM_ENTRY_CONTROLS, >>>> + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & >>>> + ~VM_ENTRY_IA32E_MODE) | >>>> (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); >>>> >>>> if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) >>>> -- >>>> 1.7.10.4 >>> >>> Best regards, >>> Yang >> > > > Best regards, > Yang > >
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e999dc7..27efa6a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2198,7 +2198,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) #else nested_vmx_exit_ctls_high = 0; #endif - nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; + nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | + VM_EXIT_LOAD_IA32_EFER); /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2207,8 +2208,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; - nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; - + nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | + VM_ENTRY_LOAD_IA32_EFER); /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); - /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */ - vmcs_write32(VM_EXIT_CONTROLS, - vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); - vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | + /* L2->L1 exit controls are emulated - the hardware exit is to L0 so + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER + * bits are further modified by vmx_set_efer() below. + */ + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + + /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are + * emulated by vmx_set_efer(), below. + */ + vmcs_write32(VM_ENTRY_CONTROLS, + (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & + ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)