Message ID | 8e2aae32-917c-8035-1aef-8b47c321e42b@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | XSA-292 follow-up | expand |
> -----Original Message----- > From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich > Sent: 17 September 2019 07:15 > To: xen-devel@lists.xenproject.org > Cc: Kevin Tian <kevin.tian@intel.com>; Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>; Wei Liu > <wl@xen.org>; Paul Durrant <paul@xen.org>; George Dunlap <George.Dunlap@citrix.com>; Andrew Cooper > <Andrew.Cooper3@citrix.com>; Jun Nakajima <jun.nakajima@intel.com>; Boris Ostrovsky > <boris.ostrovsky@oracle.com>; Roger Pau Monne <roger.pau@citrix.com> > Subject: [Xen-devel] [PATCH v2 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3() > > The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in > particular not when loading nested guest state. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -2080,6 +2080,8 @@ static int hvmemul_write_cr( > HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); > switch ( reg ) > { > + bool noflush; > + I said this before... I think the tighter case-scope is better. Cosmetically it may look a little odd, but surely it gives the compiler a better chance to optimize? Paul > case 0: > rc = hvm_set_cr0(val, true); > break; > @@ -2090,7 +2092,10 @@ static int hvmemul_write_cr( > break; > > case 3: > - rc = hvm_set_cr3(val, true); > + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); > + if ( noflush ) > + val &= ~X86_CR3_NOFLUSH; > + rc = hvm_set_cr3(val, noflush, true); > break; > > case 4: > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -2059,12 +2059,17 @@ int hvm_mov_to_cr(unsigned int cr, unsig > > switch ( cr ) > { > + bool noflush; > + > case 0: > rc = hvm_set_cr0(val, true); > break; > > case 3: > - rc = hvm_set_cr3(val, true); > + noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH); > + if ( noflush ) > + val &= ~X86_CR3_NOFLUSH; > + rc = hvm_set_cr3(val, noflush, true); > break; > > case 4: > @@ -2282,12 +2287,11 @@ int hvm_set_cr0(unsigned long value, boo > return X86EMUL_OKAY; > } > > -int hvm_set_cr3(unsigned long value, bool may_defer) > +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) > { > struct vcpu *v = current; > struct page_info *page; > unsigned long old = v->arch.hvm.guest_cr[3]; > - bool noflush = false; > > if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & > monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) > @@ -2299,17 +2303,12 @@ int hvm_set_cr3(unsigned long value, boo > /* The actual write will occur in hvm_do_resume(), if permitted. */ > v->arch.vm_event->write_data.do_write.cr3 = 1; > v->arch.vm_event->write_data.cr3 = value; > + v->arch.vm_event->write_data.cr3_noflush = noflush; > > return X86EMUL_OKAY; > } > } > > - if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ > - { > - noflush = value & X86_CR3_NOFLUSH; > - value &= ~X86_CR3_NOFLUSH; > - } > - > if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && > (value != v->arch.hvm.guest_cr[3]) ) > { > @@ -3004,7 +3003,7 @@ void hvm_task_switch( > if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) ) > goto out; > > - rc = hvm_set_cr3(tss.cr3, true); > + rc = hvm_set_cr3(tss.cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if ( rc != X86EMUL_OKAY ) > --- a/xen/arch/x86/hvm/svm/nestedsvm.c > +++ b/xen/arch/x86/hvm/svm/nestedsvm.c > @@ -324,7 +324,7 @@ static int nsvm_vcpu_hostrestore(struct > v->arch.guest_table = pagetable_null(); > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > } > - rc = hvm_set_cr3(n1vmcb->_cr3, true); > + rc = hvm_set_cr3(n1vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > @@ -584,7 +584,7 @@ static int nsvm_vmcb_prepare4vmrun(struc > nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb); > > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > - rc = hvm_set_cr3(ns_vmcb->_cr3, true); > + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > @@ -598,7 +598,7 @@ static int nsvm_vmcb_prepare4vmrun(struc > * we assume it intercepts page faults. > */ > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > - rc = hvm_set_cr3(ns_vmcb->_cr3, true); > + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > --- a/xen/arch/x86/hvm/vm_event.c > +++ b/xen/arch/x86/hvm/vm_event.c > @@ -110,7 +110,7 @@ void hvm_vm_event_do_resume(struct vcpu > > if ( unlikely(w->do_write.cr3) ) > { > - if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION ) > + if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > w->do_write.cr3 = 0; > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1032,7 +1032,7 @@ static void load_shadow_guest_state(stru > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > - rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true); > + rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > @@ -1246,7 +1246,7 @@ static void load_vvmcs_host_state(struct > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > - rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true); > + rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -274,6 +274,8 @@ struct monitor_write_data { > unsigned int cr4 : 1; > } do_write; > > + bool cr3_noflush; > + > uint32_t msr; > uint64_t value; > uint64_t cr0; > --- a/xen/include/asm-x86/hvm/support.h > +++ b/xen/include/asm-x86/hvm/support.h > @@ -135,7 +135,7 @@ void hvm_shadow_handle_cd(struct vcpu *v > */ > int hvm_set_efer(uint64_t value); > int hvm_set_cr0(unsigned long value, bool may_defer); > -int hvm_set_cr3(unsigned long value, bool may_defer); > +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer); > int hvm_set_cr4(unsigned long value, bool may_defer); > int hvm_descriptor_access_intercept(uint64_t exit_info, > uint64_t vmx_exit_qualification, > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xenproject.org > https://lists.xenproject.org/mailman/listinfo/xen-devel
On 17.09.2019 14:45, Paul Durrant wrote: >> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich >> Sent: 17 September 2019 07:15 >> >> --- a/xen/arch/x86/hvm/emulate.c >> +++ b/xen/arch/x86/hvm/emulate.c >> @@ -2080,6 +2080,8 @@ static int hvmemul_write_cr( >> HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); >> switch ( reg ) >> { >> + bool noflush; >> + > > I said this before... I think the tighter case-scope is better. I recall you saying so, but I don't recall you actually making this a requirement to get your ack. > Cosmetically it may look a little odd, but surely it gives the > compiler a better chance to optimize? I don't think so - they're pretty good to limit the life range of variables (at least in not overly hairy functions) these days. The more narrow scopes we often ask for are more for the reader to easily know what the intended usage range of a variable is. Jan
> -----Original Message----- > From: Jan Beulich <jbeulich@suse.com> > Sent: 17 September 2019 14:03 > To: Paul Durrant <Paul.Durrant@citrix.com> > Cc: xen-devel@lists.xenproject.org; Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>; Andrew > Cooper <Andrew.Cooper3@citrix.com>; George Dunlap <George.Dunlap@citrix.com>; Roger Pau Monne > <roger.pau@citrix.com>; Jun Nakajima <jun.nakajima@intel.com>; Kevin Tian <kevin.tian@intel.com>; > BorisOstrovsky <boris.ostrovsky@oracle.com>; Paul Durrant <paul@xen.org>; Wei Liu <wl@xen.org> > Subject: Re: [Xen-devel] [PATCH v2 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3() > > On 17.09.2019 14:45, Paul Durrant wrote: > >> From: Xen-devel <xen-devel-bounces@lists.xenproject.org> On Behalf Of Jan Beulich > >> Sent: 17 September 2019 07:15 > >> > >> --- a/xen/arch/x86/hvm/emulate.c > >> +++ b/xen/arch/x86/hvm/emulate.c > >> @@ -2080,6 +2080,8 @@ static int hvmemul_write_cr( > >> HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); > >> switch ( reg ) > >> { > >> + bool noflush; > >> + > > > > I said this before... I think the tighter case-scope is better. > > I recall you saying so, but I don't recall you actually making this a > requirement to get your ack. > > > Cosmetically it may look a little odd, but surely it gives the > > compiler a better chance to optimize? > > I don't think so - they're pretty good to limit the life range of > variables (at least in not overly hairy functions) these days. The > more narrow scopes we often ask for are more for the reader to > easily know what the intended usage range of a variable is. Ok. I'm not going to insist, but I would still prefer case-scope here. Reviewed-by: Paul Durrant <paul@xen.org> > > Jan
On 17/09/2019 07:15, Jan Beulich wrote: > The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in > particular not when loading nested guest state. I've found a footnote for "26.3.1.1 Checks on Guest Control Registers, Debug Registers, and MSRs" stating: "Bit 63 of the CR3 field in the guest-state area must be 0. This is true even though, If CR4.PCIDE = 1, bit 63 of the source operand to MOV to CR3 is used to determine whether cached translation information is invalidated." This is fairly clear that attempting to set the NOFLUSH bit in the VMCS is going to cause problems, and I seem to recall some vmentry failures reported in the past on the subject. What it isn't so clear on is whether we will ever find a VMCS with the NOFLUSH bit set in guest state. Sadly, the vmexit side of the above comment (which was for vmentry) simply says "The contents of ... CR3, ... are saved into the corresponding fields." Any way, I'm in principle ok with the change, except for... > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -2282,12 +2287,11 @@ int hvm_set_cr0(unsigned long value, boo > return X86EMUL_OKAY; > } > > -int hvm_set_cr3(unsigned long value, bool may_defer) > +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) > { > struct vcpu *v = current; > struct page_info *page; > unsigned long old = v->arch.hvm.guest_cr[3]; > - bool noflush = false; > > if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & > monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) > @@ -2299,17 +2303,12 @@ int hvm_set_cr3(unsigned long value, boo > /* The actual write will occur in hvm_do_resume(), if permitted. */ > v->arch.vm_event->write_data.do_write.cr3 = 1; > v->arch.vm_event->write_data.cr3 = value; > + v->arch.vm_event->write_data.cr3_noflush = noflush; ... this, which I'm pretty sure breaks the reporting of noflush mov to cr3's. The semantics of the VMI hook is "the guest tried to write this value to cr3", which should include the noflush bit in its architectural position. I suspect it also breaks a reply with the noflush bit set, because I don't see any way that is fed back from the introspecting agent. CC'ing the Introspection maintainers for their view. ~Andrew
On 17.09.2019 21:31, Andrew Cooper wrote: > On 17/09/2019 07:15, Jan Beulich wrote: >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -2282,12 +2287,11 @@ int hvm_set_cr0(unsigned long value, boo >> return X86EMUL_OKAY; >> } >> >> -int hvm_set_cr3(unsigned long value, bool may_defer) >> +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) >> { >> struct vcpu *v = current; >> struct page_info *page; >> unsigned long old = v->arch.hvm.guest_cr[3]; >> - bool noflush = false; >> >> if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & >> monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) >> @@ -2299,17 +2303,12 @@ int hvm_set_cr3(unsigned long value, boo >> /* The actual write will occur in hvm_do_resume(), if permitted. */ >> v->arch.vm_event->write_data.do_write.cr3 = 1; >> v->arch.vm_event->write_data.cr3 = value; >> + v->arch.vm_event->write_data.cr3_noflush = noflush; > > ... this, which I'm pretty sure breaks the reporting of noflush mov to > cr3's. The semantics of the VMI hook is "the guest tried to write this > value to cr3", which should include the noflush bit in its architectural > position. I suspect it also breaks a reply with the noflush bit set, > because I don't see any way that is fed back from the introspecting agent. hvm_monitor_cr() zaps the bit off the reported value. I wonder whether the patch here should include removing this zapping, as being redundant now. I don't think though that the patch alters behavior in this regard. > CC'ing the Introspection maintainers for their view. I'll wait for their feedback anyway, before making any possible change to the patch. Jan
On Wed, Sep 18, 2019 at 3:29 AM Jan Beulich <jbeulich@suse.com> wrote: > > On 17.09.2019 21:31, Andrew Cooper wrote: > > On 17/09/2019 07:15, Jan Beulich wrote: > >> --- a/xen/arch/x86/hvm/hvm.c > >> +++ b/xen/arch/x86/hvm/hvm.c > >> @@ -2282,12 +2287,11 @@ int hvm_set_cr0(unsigned long value, boo > >> return X86EMUL_OKAY; > >> } > >> > >> -int hvm_set_cr3(unsigned long value, bool may_defer) > >> +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) > >> { > >> struct vcpu *v = current; > >> struct page_info *page; > >> unsigned long old = v->arch.hvm.guest_cr[3]; > >> - bool noflush = false; > >> > >> if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & > >> monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) > >> @@ -2299,17 +2303,12 @@ int hvm_set_cr3(unsigned long value, boo > >> /* The actual write will occur in hvm_do_resume(), if permitted. */ > >> v->arch.vm_event->write_data.do_write.cr3 = 1; > >> v->arch.vm_event->write_data.cr3 = value; > >> + v->arch.vm_event->write_data.cr3_noflush = noflush; > > > > ... this, which I'm pretty sure breaks the reporting of noflush mov to > > cr3's. The semantics of the VMI hook is "the guest tried to write this > > value to cr3", which should include the noflush bit in its architectural > > position. I suspect it also breaks a reply with the noflush bit set, > > because I don't see any way that is fed back from the introspecting agent. > > hvm_monitor_cr() zaps the bit off the reported value. I wonder > whether the patch here should include removing this zapping, as > being redundant now. I don't think though that the patch alters > behavior in this regard. > > > CC'ing the Introspection maintainers for their view. > > I'll wait for their feedback anyway, before making any possible > change to the patch. > I'm not aware of any use-case of the CR3 introspection that needs to know whether the noflush bit was set or not. Having it zapped before the event is sent out simplifies handling of CR3 events because we don't have to constantly reply with a different cr3 value from the monitor agent with that bit masked. So unless that changes and we come up with a usecase that needs it I wouldn't change how things are right now. Tamas
--- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -2080,6 +2080,8 @@ static int hvmemul_write_cr( HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); switch ( reg ) { + bool noflush; + case 0: rc = hvm_set_cr0(val, true); break; @@ -2090,7 +2092,10 @@ static int hvmemul_write_cr( break; case 3: - rc = hvm_set_cr3(val, true); + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); + if ( noflush ) + val &= ~X86_CR3_NOFLUSH; + rc = hvm_set_cr3(val, noflush, true); break; case 4: --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2059,12 +2059,17 @@ int hvm_mov_to_cr(unsigned int cr, unsig switch ( cr ) { + bool noflush; + case 0: rc = hvm_set_cr0(val, true); break; case 3: - rc = hvm_set_cr3(val, true); + noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH); + if ( noflush ) + val &= ~X86_CR3_NOFLUSH; + rc = hvm_set_cr3(val, noflush, true); break; case 4: @@ -2282,12 +2287,11 @@ int hvm_set_cr0(unsigned long value, boo return X86EMUL_OKAY; } -int hvm_set_cr3(unsigned long value, bool may_defer) +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) { struct vcpu *v = current; struct page_info *page; unsigned long old = v->arch.hvm.guest_cr[3]; - bool noflush = false; if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) @@ -2299,17 +2303,12 @@ int hvm_set_cr3(unsigned long value, boo /* The actual write will occur in hvm_do_resume(), if permitted. */ v->arch.vm_event->write_data.do_write.cr3 = 1; v->arch.vm_event->write_data.cr3 = value; + v->arch.vm_event->write_data.cr3_noflush = noflush; return X86EMUL_OKAY; } } - if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ - { - noflush = value & X86_CR3_NOFLUSH; - value &= ~X86_CR3_NOFLUSH; - } - if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && (value != v->arch.hvm.guest_cr[3]) ) { @@ -3004,7 +3003,7 @@ void hvm_task_switch( if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) ) goto out; - rc = hvm_set_cr3(tss.cr3, true); + rc = hvm_set_cr3(tss.cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if ( rc != X86EMUL_OKAY ) --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -324,7 +324,7 @@ static int nsvm_vcpu_hostrestore(struct v->arch.guest_table = pagetable_null(); /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ } - rc = hvm_set_cr3(n1vmcb->_cr3, true); + rc = hvm_set_cr3(n1vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) @@ -584,7 +584,7 @@ static int nsvm_vmcb_prepare4vmrun(struc nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb); /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ - rc = hvm_set_cr3(ns_vmcb->_cr3, true); + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) @@ -598,7 +598,7 @@ static int nsvm_vmcb_prepare4vmrun(struc * we assume it intercepts page faults. */ /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ - rc = hvm_set_cr3(ns_vmcb->_cr3, true); + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) --- a/xen/arch/x86/hvm/vm_event.c +++ b/xen/arch/x86/hvm/vm_event.c @@ -110,7 +110,7 @@ void hvm_vm_event_do_resume(struct vcpu if ( unlikely(w->do_write.cr3) ) { - if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION ) + if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); w->do_write.cr3 = 0; --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1032,7 +1032,7 @@ static void load_shadow_guest_state(stru if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); - rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true); + rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); @@ -1246,7 +1246,7 @@ static void load_vvmcs_host_state(struct if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); - rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true); + rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -274,6 +274,8 @@ struct monitor_write_data { unsigned int cr4 : 1; } do_write; + bool cr3_noflush; + uint32_t msr; uint64_t value; uint64_t cr0; --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -135,7 +135,7 @@ void hvm_shadow_handle_cd(struct vcpu *v */ int hvm_set_efer(uint64_t value); int hvm_set_cr0(unsigned long value, bool may_defer); -int hvm_set_cr3(unsigned long value, bool may_defer); +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer); int hvm_set_cr4(unsigned long value, bool may_defer); int hvm_descriptor_access_intercept(uint64_t exit_info, uint64_t vmx_exit_qualification,
The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in particular not when loading nested guest state. Signed-off-by: Jan Beulich <jbeulich@suse.com>