Message ID | 5CCAE08D020000780022B307@prv1-mh.provo.novell.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | XSA-292 follow-up | expand |
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 02 May 2019 13:20 > To: xen-devel <xen-devel@lists.xenproject.org> > Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Roger Pau Monne > <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; George Dunlap <George.Dunlap@citrix.com> > Subject: [PATCH 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3() > > The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in > particular not when loading nested guest state. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr( > HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); > switch ( reg ) > { > + bool noflush; > + Why introduce 'noflush' with this scope when it could be limited to 'case 3:', although... > case 0: > rc = hvm_set_cr0(val, true); > break; > @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr( > break; > > case 3: > - rc = hvm_set_cr3(val, true); > + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); > + if ( noflush ) > + val &= ~X86_CR3_NOFLUSH; ... can't you just code this as: if ( hvm_pcid_enabled(current) ) val &= ~X86_CR3_NOFLUSH; ? Paul > + rc = hvm_set_cr3(val, noflush, true); > break; > > case 4: > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -2053,12 +2053,17 @@ int hvm_mov_to_cr(unsigned int cr, unsig > > switch ( cr ) > { > + bool noflush; > + > case 0: > rc = hvm_set_cr0(val, true); > break; > > case 3: > - rc = hvm_set_cr3(val, true); > + noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH); > + if ( noflush ) > + val &= ~X86_CR3_NOFLUSH; > + rc = hvm_set_cr3(val, noflush, true); > break; > > case 4: > @@ -2276,12 +2281,11 @@ int hvm_set_cr0(unsigned long value, boo > return X86EMUL_OKAY; > } > > -int hvm_set_cr3(unsigned long value, bool may_defer) > +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) > { > struct vcpu *v = current; > struct page_info *page; > unsigned long old = v->arch.hvm.guest_cr[3]; > - bool noflush = false; > > if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & > monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) > @@ -2293,17 +2297,12 @@ int hvm_set_cr3(unsigned long value, boo > /* The actual write will occur in hvm_do_resume(), if permitted. */ > v->arch.vm_event->write_data.do_write.cr3 = 1; > v->arch.vm_event->write_data.cr3 = value; > + v->arch.vm_event->write_data.cr3_noflush = noflush; > > return X86EMUL_OKAY; > } > } > > - if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ > - { > - noflush = value & X86_CR3_NOFLUSH; > - value &= ~X86_CR3_NOFLUSH; > - } > - > if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && > (value != v->arch.hvm.guest_cr[3]) ) > { > @@ -2998,7 +2997,7 @@ void hvm_task_switch( > if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) ) > goto out; > > - rc = hvm_set_cr3(tss.cr3, true); > + rc = hvm_set_cr3(tss.cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if ( rc != X86EMUL_OKAY ) > --- a/xen/arch/x86/hvm/svm/nestedsvm.c > +++ b/xen/arch/x86/hvm/svm/nestedsvm.c > @@ -324,7 +324,7 @@ static int nsvm_vcpu_hostrestore(struct > v->arch.guest_table = pagetable_null(); > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > } > - rc = hvm_set_cr3(n1vmcb->_cr3, true); > + rc = hvm_set_cr3(n1vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > @@ -584,7 +584,7 @@ static int nsvm_vmcb_prepare4vmrun(struc > nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb); > > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > - rc = hvm_set_cr3(ns_vmcb->_cr3, true); > + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > @@ -598,7 +598,7 @@ static int nsvm_vmcb_prepare4vmrun(struc > * we assume it intercepts page faults. > */ > /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ > - rc = hvm_set_cr3(ns_vmcb->_cr3, true); > + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > if (rc != X86EMUL_OKAY) > --- a/xen/arch/x86/hvm/vm_event.c > +++ b/xen/arch/x86/hvm/vm_event.c > @@ -110,7 +110,7 @@ void hvm_vm_event_do_resume(struct vcpu > > if ( unlikely(w->do_write.cr3) ) > { > - if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION ) > + if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > w->do_write.cr3 = 0; > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1028,7 +1028,7 @@ static void load_shadow_guest_state(stru > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > - rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true); > + rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > @@ -1242,7 +1242,7 @@ static void load_vvmcs_host_state(struct > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > - rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true); > + rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true); > if ( rc == X86EMUL_EXCEPTION ) > hvm_inject_hw_exception(TRAP_gp_fault, 0); > > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -275,6 +275,8 @@ struct monitor_write_data { > unsigned int cr4 : 1; > } do_write; > > + bool cr3_noflush; > + > uint32_t msr; > uint64_t value; > uint64_t cr0; > --- a/xen/include/asm-x86/hvm/support.h > +++ b/xen/include/asm-x86/hvm/support.h > @@ -135,7 +135,7 @@ void hvm_shadow_handle_cd(struct vcpu *v > */ > int hvm_set_efer(uint64_t value); > int hvm_set_cr0(unsigned long value, bool may_defer); > -int hvm_set_cr3(unsigned long value, bool may_defer); > +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer); > int hvm_set_cr4(unsigned long value, bool may_defer); > int hvm_descriptor_access_intercept(uint64_t exit_info, > uint64_t vmx_exit_qualification, > >
>>> On 02.05.19 at 15:07, <Paul.Durrant@citrix.com> wrote: >> From: Jan Beulich [mailto:JBeulich@suse.com] >> Sent: 02 May 2019 13:20 >> >> --- a/xen/arch/x86/hvm/emulate.c >> +++ b/xen/arch/x86/hvm/emulate.c >> @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr( >> HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); >> switch ( reg ) >> { >> + bool noflush; >> + > > Why introduce 'noflush' with this scope when it could be limited to 'case > 3:', although... Because this would entail introducing another set of braces, and I pretty much dislike these case-block braces: They either don't properly indent (as we do commonly), or they needlessly increase indentation of the enclosed block. Hence my general preference of switch-scope local variables. >> @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr( >> break; >> >> case 3: >> - rc = hvm_set_cr3(val, true); >> + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); >> + if ( noflush ) >> + val &= ~X86_CR3_NOFLUSH; > > ... can't you just code this as: > > if ( hvm_pcid_enabled(current) ) > val &= ~X86_CR3_NOFLUSH; > > ? Because of ... >> + rc = hvm_set_cr3(val, noflush, true); ... this further use of "noflush" (alongside the adjusted "val"). Jan
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 02 May 2019 14:23 > To: Paul Durrant <Paul.Durrant@citrix.com> > Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; George Dunlap <George.Dunlap@citrix.com>; Roger Pau > Monne <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; xen-devel <xen- > devel@lists.xenproject.org> > Subject: RE: [PATCH 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3() > > >>> On 02.05.19 at 15:07, <Paul.Durrant@citrix.com> wrote: > >> From: Jan Beulich [mailto:JBeulich@suse.com] > >> Sent: 02 May 2019 13:20 > >> > >> --- a/xen/arch/x86/hvm/emulate.c > >> +++ b/xen/arch/x86/hvm/emulate.c > >> @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr( > >> HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); > >> switch ( reg ) > >> { > >> + bool noflush; > >> + > > > > Why introduce 'noflush' with this scope when it could be limited to 'case > > 3:', although... > > Because this would entail introducing another set of braces, and > I pretty much dislike these case-block braces: They either don't > properly indent (as we do commonly), or they needlessly increase > indentation of the enclosed block. Hence my general preference > of switch-scope local variables. > > >> @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr( > >> break; > >> > >> case 3: > >> - rc = hvm_set_cr3(val, true); > >> + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); > >> + if ( noflush ) > >> + val &= ~X86_CR3_NOFLUSH; > > > > ... can't you just code this as: > > > > if ( hvm_pcid_enabled(current) ) > > val &= ~X86_CR3_NOFLUSH; > > > > ? > > Because of ... > > >> + rc = hvm_set_cr3(val, noflush, true); > > ... this further use of "noflush" (alongside the adjusted "val"). > Ah, missed that... I'd still go for the tighter scope though, but then I don't mind the extra braces. Paul > Jan >
--- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr( HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); switch ( reg ) { + bool noflush; + case 0: rc = hvm_set_cr0(val, true); break; @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr( break; case 3: - rc = hvm_set_cr3(val, true); + noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH); + if ( noflush ) + val &= ~X86_CR3_NOFLUSH; + rc = hvm_set_cr3(val, noflush, true); break; case 4: --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2053,12 +2053,17 @@ int hvm_mov_to_cr(unsigned int cr, unsig switch ( cr ) { + bool noflush; + case 0: rc = hvm_set_cr0(val, true); break; case 3: - rc = hvm_set_cr3(val, true); + noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH); + if ( noflush ) + val &= ~X86_CR3_NOFLUSH; + rc = hvm_set_cr3(val, noflush, true); break; case 4: @@ -2276,12 +2281,11 @@ int hvm_set_cr0(unsigned long value, boo return X86EMUL_OKAY; } -int hvm_set_cr3(unsigned long value, bool may_defer) +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer) { struct vcpu *v = current; struct page_info *page; unsigned long old = v->arch.hvm.guest_cr[3]; - bool noflush = false; if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled & monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) ) @@ -2293,17 +2297,12 @@ int hvm_set_cr3(unsigned long value, boo /* The actual write will occur in hvm_do_resume(), if permitted. */ v->arch.vm_event->write_data.do_write.cr3 = 1; v->arch.vm_event->write_data.cr3 = value; + v->arch.vm_event->write_data.cr3_noflush = noflush; return X86EMUL_OKAY; } } - if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */ - { - noflush = value & X86_CR3_NOFLUSH; - value &= ~X86_CR3_NOFLUSH; - } - if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && (value != v->arch.hvm.guest_cr[3]) ) { @@ -2998,7 +2997,7 @@ void hvm_task_switch( if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) ) goto out; - rc = hvm_set_cr3(tss.cr3, true); + rc = hvm_set_cr3(tss.cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if ( rc != X86EMUL_OKAY ) --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -324,7 +324,7 @@ static int nsvm_vcpu_hostrestore(struct v->arch.guest_table = pagetable_null(); /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ } - rc = hvm_set_cr3(n1vmcb->_cr3, true); + rc = hvm_set_cr3(n1vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) @@ -584,7 +584,7 @@ static int nsvm_vmcb_prepare4vmrun(struc nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb); /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ - rc = hvm_set_cr3(ns_vmcb->_cr3, true); + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) @@ -598,7 +598,7 @@ static int nsvm_vmcb_prepare4vmrun(struc * we assume it intercepts page faults. */ /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */ - rc = hvm_set_cr3(ns_vmcb->_cr3, true); + rc = hvm_set_cr3(ns_vmcb->_cr3, false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); if (rc != X86EMUL_OKAY) --- a/xen/arch/x86/hvm/vm_event.c +++ b/xen/arch/x86/hvm/vm_event.c @@ -110,7 +110,7 @@ void hvm_vm_event_do_resume(struct vcpu if ( unlikely(w->do_write.cr3) ) { - if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION ) + if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); w->do_write.cr3 = 0; --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1028,7 +1028,7 @@ static void load_shadow_guest_state(stru if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); - rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true); + rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); @@ -1242,7 +1242,7 @@ static void load_vvmcs_host_state(struct if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); - rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true); + rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true); if ( rc == X86EMUL_EXCEPTION ) hvm_inject_hw_exception(TRAP_gp_fault, 0); --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -275,6 +275,8 @@ struct monitor_write_data { unsigned int cr4 : 1; } do_write; + bool cr3_noflush; + uint32_t msr; uint64_t value; uint64_t cr0; --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -135,7 +135,7 @@ void hvm_shadow_handle_cd(struct vcpu *v */ int hvm_set_efer(uint64_t value); int hvm_set_cr0(unsigned long value, bool may_defer); -int hvm_set_cr3(unsigned long value, bool may_defer); +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer); int hvm_set_cr4(unsigned long value, bool may_defer); int hvm_descriptor_access_intercept(uint64_t exit_info, uint64_t vmx_exit_qualification,
The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in particular not when loading nested guest state. Signed-off-by: Jan Beulich <jbeulich@suse.com>