Message ID | 20170922055315.5573-1-lprosek@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 22/09/2017 07:53, Ladi Prosek wrote: > For nested virt we maintain multiple VMCS that can run on a vCPU. So it is > incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching > the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in > vCPU-wide data structures. > > Hyper-V nested on KVM runs into this consistently for me with PCID enabled. > CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) > fires, and the currently loaded VMCS is updated. Then we switch from L2 to > L1 and the next exit reverts CR3 to its old value. > > Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") > Signed-off-by: Ladi Prosek <lprosek@redhat.com> Thanks, queued with Cc: stable@vger.kernel.org Paolo > --- > arch/x86/kvm/vmx.c | 16 ++++++++-------- > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 0726ca7a1b02..c83d28b0ab05 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -200,6 +200,8 @@ struct loaded_vmcs { > int cpu; > bool launched; > bool nmi_known_unmasked; > + unsigned long vmcs_host_cr3; /* May not match real cr3 */ > + unsigned long vmcs_host_cr4; /* May not match real cr4 */ > struct list_head loaded_vmcss_on_cpu_link; > }; > > @@ -600,8 +602,6 @@ struct vcpu_vmx { > int gs_ldt_reload_needed; > int fs_reload_needed; > u64 msr_host_bndcfgs; > - unsigned long vmcs_host_cr3; /* May not match real cr3 */ > - unsigned long vmcs_host_cr4; /* May not match real cr4 */ > } host_state; > struct { > int vm86_active; > @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > */ > cr3 = __read_cr3(); > vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ > - vmx->host_state.vmcs_host_cr3 = cr3; > + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; > > /* Save the most likely value for this task's CR4 in the VMCS. */ > cr4 = cr4_read_shadow(); > vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ > - vmx->host_state.vmcs_host_cr4 = cr4; > + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; > > vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ > #ifdef CONFIG_X86_64 > @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); > > cr3 = __get_current_cr3_fast(); > - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { > + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { > vmcs_writel(HOST_CR3, cr3); > - vmx->host_state.vmcs_host_cr3 = cr3; > + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; > } > > cr4 = cr4_read_shadow(); > - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { > + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { > vmcs_writel(HOST_CR4, cr4); > - vmx->host_state.vmcs_host_cr4 = cr4; > + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; > } > > /* When single-stepping over STI and MOV SS, we must clear the >
2017-09-22 13:53 GMT+08:00 Ladi Prosek <lprosek@redhat.com>: > For nested virt we maintain multiple VMCS that can run on a vCPU. So it is > incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching > the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in > vCPU-wide data structures. > > Hyper-V nested on KVM runs into this consistently for me with PCID enabled. > CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) > fires, and the currently loaded VMCS is updated. Then we switch from L2 to > L1 and the next exit reverts CR3 to its old value. Could you explain why CR3 is updated when PCID is enabled? In addition, the host cr3/cr4 of both vmcs01 and vmcs02 should be from the L0, so why there is a difference? Regards, Wanpeng Li > > Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") > Signed-off-by: Ladi Prosek <lprosek@redhat.com> > --- > arch/x86/kvm/vmx.c | 16 ++++++++-------- > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 0726ca7a1b02..c83d28b0ab05 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -200,6 +200,8 @@ struct loaded_vmcs { > int cpu; > bool launched; > bool nmi_known_unmasked; > + unsigned long vmcs_host_cr3; /* May not match real cr3 */ > + unsigned long vmcs_host_cr4; /* May not match real cr4 */ > struct list_head loaded_vmcss_on_cpu_link; > }; > > @@ -600,8 +602,6 @@ struct vcpu_vmx { > int gs_ldt_reload_needed; > int fs_reload_needed; > u64 msr_host_bndcfgs; > - unsigned long vmcs_host_cr3; /* May not match real cr3 */ > - unsigned long vmcs_host_cr4; /* May not match real cr4 */ > } host_state; > struct { > int vm86_active; > @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > */ > cr3 = __read_cr3(); > vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ > - vmx->host_state.vmcs_host_cr3 = cr3; > + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; > > /* Save the most likely value for this task's CR4 in the VMCS. */ > cr4 = cr4_read_shadow(); > vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ > - vmx->host_state.vmcs_host_cr4 = cr4; > + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; > > vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ > #ifdef CONFIG_X86_64 > @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); > > cr3 = __get_current_cr3_fast(); > - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { > + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { > vmcs_writel(HOST_CR3, cr3); > - vmx->host_state.vmcs_host_cr3 = cr3; > + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; > } > > cr4 = cr4_read_shadow(); > - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { > + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { > vmcs_writel(HOST_CR4, cr4); > - vmx->host_state.vmcs_host_cr4 = cr4; > + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; > } > > /* When single-stepping over STI and MOV SS, we must clear the > -- > 2.13.5 >
--Andy > On Sep 24, 2017, at 5:55 PM, Wanpeng Li <kernellwp@gmail.com> wrote: > > 2017-09-22 13:53 GMT+08:00 Ladi Prosek <lprosek@redhat.com>: >> For nested virt we maintain multiple VMCS that can run on a vCPU. So it is >> incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching >> the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in >> vCPU-wide data structures. >> >> Hyper-V nested on KVM runs into this consistently for me with PCID enabled. >> CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) >> fires, and the currently loaded VMCS is updated. Then we switch from L2 to >> L1 and the next exit reverts CR3 to its old value. > > Could you explain why CR3 is updated when PCID is enabled? An mm doesn't have a fixed ASID in our implementation. When we schedule out, we can come back with a different ASID. When a task is migrated, its ASID on the new CPU is completely unrelated to its ASID on the old CPU. > In > addition, the host cr3/cr4 of both vmcs01 and vmcs02 should be from > the L0, so why there is a difference? > It's caching the value in the VMCS, not the L0 value. > Regards, > Wanpeng Li > >> >> Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") >> Signed-off-by: Ladi Prosek <lprosek@redhat.com> >> --- >> arch/x86/kvm/vmx.c | 16 ++++++++-------- >> 1 file changed, 8 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 0726ca7a1b02..c83d28b0ab05 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -200,6 +200,8 @@ struct loaded_vmcs { >> int cpu; >> bool launched; >> bool nmi_known_unmasked; >> + unsigned long vmcs_host_cr3; /* May not match real cr3 */ >> + unsigned long vmcs_host_cr4; /* May not match real cr4 */ >> struct list_head loaded_vmcss_on_cpu_link; >> }; >> >> @@ -600,8 +602,6 @@ struct vcpu_vmx { >> int gs_ldt_reload_needed; >> int fs_reload_needed; >> u64 msr_host_bndcfgs; >> - unsigned long vmcs_host_cr3; /* May not match real cr3 */ >> - unsigned long vmcs_host_cr4; /* May not match real cr4 */ >> } host_state; >> struct { >> int vm86_active; >> @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) >> */ >> cr3 = __read_cr3(); >> vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ >> - vmx->host_state.vmcs_host_cr3 = cr3; >> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >> >> /* Save the most likely value for this task's CR4 in the VMCS. */ >> cr4 = cr4_read_shadow(); >> vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ >> - vmx->host_state.vmcs_host_cr4 = cr4; >> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >> >> vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ >> #ifdef CONFIG_X86_64 >> @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); >> >> cr3 = __get_current_cr3_fast(); >> - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { >> + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { >> vmcs_writel(HOST_CR3, cr3); >> - vmx->host_state.vmcs_host_cr3 = cr3; >> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >> } >> >> cr4 = cr4_read_shadow(); >> - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { >> + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { >> vmcs_writel(HOST_CR4, cr4); >> - vmx->host_state.vmcs_host_cr4 = cr4; >> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >> } >> >> /* When single-stepping over STI and MOV SS, we must clear the >> -- >> 2.13.5 >>
2017-09-25 11:46 GMT+08:00 Andy Lutomirski <luto@amacapital.net>: > > > > --Andy >> On Sep 24, 2017, at 5:55 PM, Wanpeng Li <kernellwp@gmail.com> wrote: >> >> 2017-09-22 13:53 GMT+08:00 Ladi Prosek <lprosek@redhat.com>: >>> For nested virt we maintain multiple VMCS that can run on a vCPU. So it is >>> incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching >>> the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in >>> vCPU-wide data structures. >>> >>> Hyper-V nested on KVM runs into this consistently for me with PCID enabled. >>> CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) >>> fires, and the currently loaded VMCS is updated. Then we switch from L2 to >>> L1 and the next exit reverts CR3 to its old value. >> >> Could you explain why CR3 is updated when PCID is enabled? > > An mm doesn't have a fixed ASID in our implementation. When we schedule out, we can come back with a different ASID. When a task is migrated, its ASID on the new CPU is completely unrelated to its ASID on the old CPU. Thanks for the information. > >> In >> addition, the host cr3/cr4 of both vmcs01 and vmcs02 should be from >> the L0, so why there is a difference? >> > > It's caching the value in the VMCS, not the L0 value. I think it is true if the caching value is guest state, however, the caching values for CR3/CR4 here in the VMCS02/VMCS01 are both from the value on L0, the host state. Regards, Wanpeng Li > >> Regards, >> Wanpeng Li >> >>> >>> Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") >>> Signed-off-by: Ladi Prosek <lprosek@redhat.com> >>> --- >>> arch/x86/kvm/vmx.c | 16 ++++++++-------- >>> 1 file changed, 8 insertions(+), 8 deletions(-) >>> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>> index 0726ca7a1b02..c83d28b0ab05 100644 >>> --- a/arch/x86/kvm/vmx.c >>> +++ b/arch/x86/kvm/vmx.c >>> @@ -200,6 +200,8 @@ struct loaded_vmcs { >>> int cpu; >>> bool launched; >>> bool nmi_known_unmasked; >>> + unsigned long vmcs_host_cr3; /* May not match real cr3 */ >>> + unsigned long vmcs_host_cr4; /* May not match real cr4 */ >>> struct list_head loaded_vmcss_on_cpu_link; >>> }; >>> >>> @@ -600,8 +602,6 @@ struct vcpu_vmx { >>> int gs_ldt_reload_needed; >>> int fs_reload_needed; >>> u64 msr_host_bndcfgs; >>> - unsigned long vmcs_host_cr3; /* May not match real cr3 */ >>> - unsigned long vmcs_host_cr4; /* May not match real cr4 */ >>> } host_state; >>> struct { >>> int vm86_active; >>> @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) >>> */ >>> cr3 = __read_cr3(); >>> vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ >>> - vmx->host_state.vmcs_host_cr3 = cr3; >>> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >>> >>> /* Save the most likely value for this task's CR4 in the VMCS. */ >>> cr4 = cr4_read_shadow(); >>> vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ >>> - vmx->host_state.vmcs_host_cr4 = cr4; >>> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >>> >>> vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ >>> #ifdef CONFIG_X86_64 >>> @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >>> vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); >>> >>> cr3 = __get_current_cr3_fast(); >>> - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { >>> + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { >>> vmcs_writel(HOST_CR3, cr3); >>> - vmx->host_state.vmcs_host_cr3 = cr3; >>> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >>> } >>> >>> cr4 = cr4_read_shadow(); >>> - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { >>> + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { >>> vmcs_writel(HOST_CR4, cr4); >>> - vmx->host_state.vmcs_host_cr4 = cr4; >>> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >>> } >>> >>> /* When single-stepping over STI and MOV SS, we must clear the >>> -- >>> 2.13.5 >>>
2017-09-25 18:59 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>: > 2017-09-25 11:46 GMT+08:00 Andy Lutomirski <luto@amacapital.net>: >> >> >> >> --Andy >>> On Sep 24, 2017, at 5:55 PM, Wanpeng Li <kernellwp@gmail.com> wrote: >>> >>> 2017-09-22 13:53 GMT+08:00 Ladi Prosek <lprosek@redhat.com>: >>>> For nested virt we maintain multiple VMCS that can run on a vCPU. So it is >>>> incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching >>>> the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in >>>> vCPU-wide data structures. >>>> >>>> Hyper-V nested on KVM runs into this consistently for me with PCID enabled. >>>> CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) >>>> fires, and the currently loaded VMCS is updated. Then we switch from L2 to >>>> L1 and the next exit reverts CR3 to its old value. >>> >>> Could you explain why CR3 is updated when PCID is enabled? >> >> An mm doesn't have a fixed ASID in our implementation. When we schedule out, we can come back with a different ASID. When a task is migrated, its ASID on the new CPU is completely unrelated to its ASID on the old CPU. > > Thanks for the information. > >> >>> In >>> addition, the host cr3/cr4 of both vmcs01 and vmcs02 should be from >>> the L0, so why there is a difference? >>> >> >> It's caching the value in the VMCS, not the L0 value. > > I think it is true if the caching value is guest state, however, the > caching values for CR3/CR4 here in the VMCS02/VMCS01 are both from the > value on L0, the host state. I misunderstand it, you are right. Regards, Wanpeng Li > > Regards, > Wanpeng Li > >> >>> Regards, >>> Wanpeng Li >>> >>>> >>>> Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") >>>> Signed-off-by: Ladi Prosek <lprosek@redhat.com> >>>> --- >>>> arch/x86/kvm/vmx.c | 16 ++++++++-------- >>>> 1 file changed, 8 insertions(+), 8 deletions(-) >>>> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >>>> index 0726ca7a1b02..c83d28b0ab05 100644 >>>> --- a/arch/x86/kvm/vmx.c >>>> +++ b/arch/x86/kvm/vmx.c >>>> @@ -200,6 +200,8 @@ struct loaded_vmcs { >>>> int cpu; >>>> bool launched; >>>> bool nmi_known_unmasked; >>>> + unsigned long vmcs_host_cr3; /* May not match real cr3 */ >>>> + unsigned long vmcs_host_cr4; /* May not match real cr4 */ >>>> struct list_head loaded_vmcss_on_cpu_link; >>>> }; >>>> >>>> @@ -600,8 +602,6 @@ struct vcpu_vmx { >>>> int gs_ldt_reload_needed; >>>> int fs_reload_needed; >>>> u64 msr_host_bndcfgs; >>>> - unsigned long vmcs_host_cr3; /* May not match real cr3 */ >>>> - unsigned long vmcs_host_cr4; /* May not match real cr4 */ >>>> } host_state; >>>> struct { >>>> int vm86_active; >>>> @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) >>>> */ >>>> cr3 = __read_cr3(); >>>> vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ >>>> - vmx->host_state.vmcs_host_cr3 = cr3; >>>> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >>>> >>>> /* Save the most likely value for this task's CR4 in the VMCS. */ >>>> cr4 = cr4_read_shadow(); >>>> vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ >>>> - vmx->host_state.vmcs_host_cr4 = cr4; >>>> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >>>> >>>> vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ >>>> #ifdef CONFIG_X86_64 >>>> @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >>>> vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); >>>> >>>> cr3 = __get_current_cr3_fast(); >>>> - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { >>>> + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { >>>> vmcs_writel(HOST_CR3, cr3); >>>> - vmx->host_state.vmcs_host_cr3 = cr3; >>>> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >>>> } >>>> >>>> cr4 = cr4_read_shadow(); >>>> - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { >>>> + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { >>>> vmcs_writel(HOST_CR4, cr4); >>>> - vmx->host_state.vmcs_host_cr4 = cr4; >>>> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >>>> } >>>> >>>> /* When single-stepping over STI and MOV SS, we must clear the >>>> -- >>>> 2.13.5 >>>>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0726ca7a1b02..c83d28b0ab05 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -200,6 +200,8 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; + unsigned long vmcs_host_cr3; /* May not match real cr3 */ + unsigned long vmcs_host_cr4; /* May not match real cr4 */ struct list_head loaded_vmcss_on_cpu_link; }; @@ -600,8 +602,6 @@ struct vcpu_vmx { int gs_ldt_reload_needed; int fs_reload_needed; u64 msr_host_bndcfgs; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ } host_state; struct { int vm86_active; @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ cr3 = __read_cr3(); vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { vmcs_writel(HOST_CR3, cr3); - vmx->host_state.vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; } cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { vmcs_writel(HOST_CR4, cr4); - vmx->host_state.vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; } /* When single-stepping over STI and MOV SS, we must clear the
For nested virt we maintain multiple VMCS that can run on a vCPU. So it is incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in vCPU-wide data structures. Hyper-V nested on KVM runs into this consistently for me with PCID enabled. CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) fires, and the currently loaded VMCS is updated. Then we switch from L2 to L1 and the next exit reverts CR3 to its old value. Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") Signed-off-by: Ladi Prosek <lprosek@redhat.com> --- arch/x86/kvm/vmx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)