diff mbox series

[4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3()

Message ID 5CCAE08D020000780022B307@prv1-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show
Series XSA-292 follow-up | expand

Commit Message

Jan Beulich May 2, 2019, 12:20 p.m. UTC
The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in
particular not when loading nested guest state.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Paul Durrant May 2, 2019, 1:07 p.m. UTC | #1
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 02 May 2019 13:20
> To: xen-devel <xen-devel@lists.xenproject.org>
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; George Dunlap <George.Dunlap@citrix.com>
> Subject: [PATCH 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3()
> 
> The bit is meaningful only for MOV-to-CR3 insns, not anywhere else, in
> particular not when loading nested guest state.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr(
>      HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
>      switch ( reg )
>      {
> +        bool noflush;
> +

Why introduce 'noflush' with this scope when it could be limited to 'case 3:', although...

>      case 0:
>          rc = hvm_set_cr0(val, true);
>          break;
> @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr(
>          break;
> 
>      case 3:
> -        rc = hvm_set_cr3(val, true);
> +        noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH);
> +        if ( noflush )
> +            val &= ~X86_CR3_NOFLUSH;

... can't you just code this as:

if ( hvm_pcid_enabled(current) )
    val &= ~X86_CR3_NOFLUSH;

?

  Paul

> +        rc = hvm_set_cr3(val, noflush, true);
>          break;
> 
>      case 4:
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -2053,12 +2053,17 @@ int hvm_mov_to_cr(unsigned int cr, unsig
> 
>      switch ( cr )
>      {
> +        bool noflush;
> +
>      case 0:
>          rc = hvm_set_cr0(val, true);
>          break;
> 
>      case 3:
> -        rc = hvm_set_cr3(val, true);
> +        noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH);
> +        if ( noflush )
> +            val &= ~X86_CR3_NOFLUSH;
> +        rc = hvm_set_cr3(val, noflush, true);
>          break;
> 
>      case 4:
> @@ -2276,12 +2281,11 @@ int hvm_set_cr0(unsigned long value, boo
>      return X86EMUL_OKAY;
>  }
> 
> -int hvm_set_cr3(unsigned long value, bool may_defer)
> +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer)
>  {
>      struct vcpu *v = current;
>      struct page_info *page;
>      unsigned long old = v->arch.hvm.guest_cr[3];
> -    bool noflush = false;
> 
>      if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled &
>                                 monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
> @@ -2293,17 +2297,12 @@ int hvm_set_cr3(unsigned long value, boo
>              /* The actual write will occur in hvm_do_resume(), if permitted. */
>              v->arch.vm_event->write_data.do_write.cr3 = 1;
>              v->arch.vm_event->write_data.cr3 = value;
> +            v->arch.vm_event->write_data.cr3_noflush = noflush;
> 
>              return X86EMUL_OKAY;
>          }
>      }
> 
> -    if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
> -    {
> -        noflush = value & X86_CR3_NOFLUSH;
> -        value &= ~X86_CR3_NOFLUSH;
> -    }
> -
>      if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
>           (value != v->arch.hvm.guest_cr[3]) )
>      {
> @@ -2998,7 +2997,7 @@ void hvm_task_switch(
>      if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) )
>          goto out;
> 
> -    rc = hvm_set_cr3(tss.cr3, true);
> +    rc = hvm_set_cr3(tss.cr3, false, true);
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
>      if ( rc != X86EMUL_OKAY )
> --- a/xen/arch/x86/hvm/svm/nestedsvm.c
> +++ b/xen/arch/x86/hvm/svm/nestedsvm.c
> @@ -324,7 +324,7 @@ static int nsvm_vcpu_hostrestore(struct
>          v->arch.guest_table = pagetable_null();
>          /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
>      }
> -    rc = hvm_set_cr3(n1vmcb->_cr3, true);
> +    rc = hvm_set_cr3(n1vmcb->_cr3, false, true);
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
>      if (rc != X86EMUL_OKAY)
> @@ -584,7 +584,7 @@ static int nsvm_vmcb_prepare4vmrun(struc
>          nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb);
> 
>          /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
> -        rc = hvm_set_cr3(ns_vmcb->_cr3, true);
> +        rc = hvm_set_cr3(ns_vmcb->_cr3, false, true);
>          if ( rc == X86EMUL_EXCEPTION )
>              hvm_inject_hw_exception(TRAP_gp_fault, 0);
>          if (rc != X86EMUL_OKAY)
> @@ -598,7 +598,7 @@ static int nsvm_vmcb_prepare4vmrun(struc
>           * we assume it intercepts page faults.
>           */
>          /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
> -        rc = hvm_set_cr3(ns_vmcb->_cr3, true);
> +        rc = hvm_set_cr3(ns_vmcb->_cr3, false, true);
>          if ( rc == X86EMUL_EXCEPTION )
>              hvm_inject_hw_exception(TRAP_gp_fault, 0);
>          if (rc != X86EMUL_OKAY)
> --- a/xen/arch/x86/hvm/vm_event.c
> +++ b/xen/arch/x86/hvm/vm_event.c
> @@ -110,7 +110,7 @@ void hvm_vm_event_do_resume(struct vcpu
> 
>      if ( unlikely(w->do_write.cr3) )
>      {
> -        if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION )
> +        if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION )
>              hvm_inject_hw_exception(TRAP_gp_fault, 0);
> 
>          w->do_write.cr3 = 0;
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -1028,7 +1028,7 @@ static void load_shadow_guest_state(stru
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
> 
> -    rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true);
> +    rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true);
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
> 
> @@ -1242,7 +1242,7 @@ static void load_vvmcs_host_state(struct
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
> 
> -    rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true);
> +    rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true);
>      if ( rc == X86EMUL_EXCEPTION )
>          hvm_inject_hw_exception(TRAP_gp_fault, 0);
> 
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -275,6 +275,8 @@ struct monitor_write_data {
>          unsigned int cr4 : 1;
>      } do_write;
> 
> +    bool cr3_noflush;
> +
>      uint32_t msr;
>      uint64_t value;
>      uint64_t cr0;
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -135,7 +135,7 @@ void hvm_shadow_handle_cd(struct vcpu *v
>   */
>  int hvm_set_efer(uint64_t value);
>  int hvm_set_cr0(unsigned long value, bool may_defer);
> -int hvm_set_cr3(unsigned long value, bool may_defer);
> +int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer);
>  int hvm_set_cr4(unsigned long value, bool may_defer);
>  int hvm_descriptor_access_intercept(uint64_t exit_info,
>                                      uint64_t vmx_exit_qualification,
> 
>
Jan Beulich May 2, 2019, 1:23 p.m. UTC | #2
>>> On 02.05.19 at 15:07, <Paul.Durrant@citrix.com> wrote:
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 02 May 2019 13:20
>> 
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr(
>>      HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
>>      switch ( reg )
>>      {
>> +        bool noflush;
>> +
> 
> Why introduce 'noflush' with this scope when it could be limited to 'case 
> 3:', although...

Because this would entail introducing another set of braces, and
I pretty much dislike these case-block braces: They either don't
properly indent (as we do commonly), or they needlessly increase
indentation of the enclosed block. Hence my general preference
of switch-scope local variables.

>> @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr(
>>          break;
>> 
>>      case 3:
>> -        rc = hvm_set_cr3(val, true);
>> +        noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH);
>> +        if ( noflush )
>> +            val &= ~X86_CR3_NOFLUSH;
> 
> ... can't you just code this as:
> 
> if ( hvm_pcid_enabled(current) )
>     val &= ~X86_CR3_NOFLUSH;
> 
> ?

Because of ...

>> +        rc = hvm_set_cr3(val, noflush, true);

... this further use of "noflush" (alongside the adjusted "val").

Jan
Paul Durrant May 2, 2019, 1:25 p.m. UTC | #3
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 02 May 2019 14:23
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; George Dunlap <George.Dunlap@citrix.com>; Roger Pau
> Monne <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; xen-devel <xen-
> devel@lists.xenproject.org>
> Subject: RE: [PATCH 4/9] x86/HVM: move NOFLUSH handling out of hvm_set_cr3()
> 
> >>> On 02.05.19 at 15:07, <Paul.Durrant@citrix.com> wrote:
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 02 May 2019 13:20
> >>
> >> --- a/xen/arch/x86/hvm/emulate.c
> >> +++ b/xen/arch/x86/hvm/emulate.c
> >> @@ -2072,6 +2072,8 @@ static int hvmemul_write_cr(
> >>      HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
> >>      switch ( reg )
> >>      {
> >> +        bool noflush;
> >> +
> >
> > Why introduce 'noflush' with this scope when it could be limited to 'case
> > 3:', although...
> 
> Because this would entail introducing another set of braces, and
> I pretty much dislike these case-block braces: They either don't
> properly indent (as we do commonly), or they needlessly increase
> indentation of the enclosed block. Hence my general preference
> of switch-scope local variables.
> 
> >> @@ -2082,7 +2084,10 @@ static int hvmemul_write_cr(
> >>          break;
> >>
> >>      case 3:
> >> -        rc = hvm_set_cr3(val, true);
> >> +        noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH);
> >> +        if ( noflush )
> >> +            val &= ~X86_CR3_NOFLUSH;
> >
> > ... can't you just code this as:
> >
> > if ( hvm_pcid_enabled(current) )
> >     val &= ~X86_CR3_NOFLUSH;
> >
> > ?
> 
> Because of ...
> 
> >> +        rc = hvm_set_cr3(val, noflush, true);
> 
> ... this further use of "noflush" (alongside the adjusted "val").
> 

Ah, missed that... I'd still go for the tighter scope though, but then I don't mind the extra braces.

  Paul

> Jan
>
diff mbox series

Patch

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -2072,6 +2072,8 @@  static int hvmemul_write_cr(
     HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
     switch ( reg )
     {
+        bool noflush;
+
     case 0:
         rc = hvm_set_cr0(val, true);
         break;
@@ -2082,7 +2084,10 @@  static int hvmemul_write_cr(
         break;
 
     case 3:
-        rc = hvm_set_cr3(val, true);
+        noflush = hvm_pcid_enabled(current) && (val & X86_CR3_NOFLUSH);
+        if ( noflush )
+            val &= ~X86_CR3_NOFLUSH;
+        rc = hvm_set_cr3(val, noflush, true);
         break;
 
     case 4:
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2053,12 +2053,17 @@  int hvm_mov_to_cr(unsigned int cr, unsig
 
     switch ( cr )
     {
+        bool noflush;
+
     case 0:
         rc = hvm_set_cr0(val, true);
         break;
 
     case 3:
-        rc = hvm_set_cr3(val, true);
+        noflush = hvm_pcid_enabled(curr) && (val & X86_CR3_NOFLUSH);
+        if ( noflush )
+            val &= ~X86_CR3_NOFLUSH;
+        rc = hvm_set_cr3(val, noflush, true);
         break;
 
     case 4:
@@ -2276,12 +2281,11 @@  int hvm_set_cr0(unsigned long value, boo
     return X86EMUL_OKAY;
 }
 
-int hvm_set_cr3(unsigned long value, bool may_defer)
+int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer)
 {
     struct vcpu *v = current;
     struct page_info *page;
     unsigned long old = v->arch.hvm.guest_cr[3];
-    bool noflush = false;
 
     if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled &
                                monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
@@ -2293,17 +2297,12 @@  int hvm_set_cr3(unsigned long value, boo
             /* The actual write will occur in hvm_do_resume(), if permitted. */
             v->arch.vm_event->write_data.do_write.cr3 = 1;
             v->arch.vm_event->write_data.cr3 = value;
+            v->arch.vm_event->write_data.cr3_noflush = noflush;
 
             return X86EMUL_OKAY;
         }
     }
 
-    if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
-    {
-        noflush = value & X86_CR3_NOFLUSH;
-        value &= ~X86_CR3_NOFLUSH;
-    }
-
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm.guest_cr[3]) )
     {
@@ -2998,7 +2997,7 @@  void hvm_task_switch(
     if ( task_switch_load_seg(x86_seg_ldtr, tss.ldt, new_cpl, 0) )
         goto out;
 
-    rc = hvm_set_cr3(tss.cr3, true);
+    rc = hvm_set_cr3(tss.cr3, false, true);
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
     if ( rc != X86EMUL_OKAY )
--- a/xen/arch/x86/hvm/svm/nestedsvm.c
+++ b/xen/arch/x86/hvm/svm/nestedsvm.c
@@ -324,7 +324,7 @@  static int nsvm_vcpu_hostrestore(struct
         v->arch.guest_table = pagetable_null();
         /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
     }
-    rc = hvm_set_cr3(n1vmcb->_cr3, true);
+    rc = hvm_set_cr3(n1vmcb->_cr3, false, true);
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
     if (rc != X86EMUL_OKAY)
@@ -584,7 +584,7 @@  static int nsvm_vmcb_prepare4vmrun(struc
         nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb);
 
         /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
-        rc = hvm_set_cr3(ns_vmcb->_cr3, true);
+        rc = hvm_set_cr3(ns_vmcb->_cr3, false, true);
         if ( rc == X86EMUL_EXCEPTION )
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
         if (rc != X86EMUL_OKAY)
@@ -598,7 +598,7 @@  static int nsvm_vmcb_prepare4vmrun(struc
          * we assume it intercepts page faults.
          */
         /* hvm_set_cr3() below sets v->arch.hvm.guest_cr[3] for us. */
-        rc = hvm_set_cr3(ns_vmcb->_cr3, true);
+        rc = hvm_set_cr3(ns_vmcb->_cr3, false, true);
         if ( rc == X86EMUL_EXCEPTION )
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
         if (rc != X86EMUL_OKAY)
--- a/xen/arch/x86/hvm/vm_event.c
+++ b/xen/arch/x86/hvm/vm_event.c
@@ -110,7 +110,7 @@  void hvm_vm_event_do_resume(struct vcpu
 
     if ( unlikely(w->do_write.cr3) )
     {
-        if ( hvm_set_cr3(w->cr3, false) == X86EMUL_EXCEPTION )
+        if ( hvm_set_cr3(w->cr3, w->cr3_noflush, false) == X86EMUL_EXCEPTION )
             hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
         w->do_write.cr3 = 0;
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1028,7 +1028,7 @@  static void load_shadow_guest_state(stru
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
-    rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), true);
+    rc = hvm_set_cr3(get_vvmcs(v, GUEST_CR3), false, true);
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
@@ -1242,7 +1242,7 @@  static void load_vvmcs_host_state(struct
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
-    rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), true);
+    rc = hvm_set_cr3(get_vvmcs(v, HOST_CR3), false, true);
     if ( rc == X86EMUL_EXCEPTION )
         hvm_inject_hw_exception(TRAP_gp_fault, 0);
 
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -275,6 +275,8 @@  struct monitor_write_data {
         unsigned int cr4 : 1;
     } do_write;
 
+    bool cr3_noflush;
+
     uint32_t msr;
     uint64_t value;
     uint64_t cr0;
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -135,7 +135,7 @@  void hvm_shadow_handle_cd(struct vcpu *v
  */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value, bool may_defer);
-int hvm_set_cr3(unsigned long value, bool may_defer);
+int hvm_set_cr3(unsigned long value, bool noflush, bool may_defer);
 int hvm_set_cr4(unsigned long value, bool may_defer);
 int hvm_descriptor_access_intercept(uint64_t exit_info,
                                     uint64_t vmx_exit_qualification,