diff mbox

[v2,2/3] x86emul: correct handling of FPU insns faulting on memory write

Message ID 58C9254F02000078001433B0@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich March 15, 2017, 10:28 a.m. UTC
When an FPU instruction with a memory destination fails during the
memory write, it should not affect FPU register state. Due to the way
we emulate FPU (and SIMD) instructions, we can only guarantee this by
- backing out changes to the FPU register state in such a case or
- doing a descriptor read and/or page walk up front, perhaps with the
  stubs accessing the actual memory location then.
The latter would require a significant change in how the emulator does
its guest memory accessing, so for now the former variant is being
chosen.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
v2: Re-base.
---
Note that the state save overhead (unless state hadn't been loaded at
all before, which should only be possible if a guest is fiddling with
the instruction stream under emulation) is taken for every FPU insn
hitting the emulator. We could reduce this to just the ones writing to
memory, but that would involve quite a few further changes and
resulting code where even more code paths need to match up with one
another.
x86emul: correct handling of FPU insns faulting on memory write

When an FPU instruction with a memory destination fails during the
memory write, it should not affect FPU register state. Due to the way
we emulate FPU (and SIMD) instructions, we can only guarantee this by
- backing out changes to the FPU register state in such a case or
- doing a descriptor read and/or page walk up front, perhaps with the
  stubs accessing the actual memory location then.
The latter would require a significant change in how the emulator does
its guest memory accessing, so for now the former variant is being
chosen.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
v2: Re-base.
---
Note that the state save overhead (unless state hadn't been loaded at
all before, which should only be possible if a guest is fiddling with
the instruction stream under emulation) is taken for every FPU insn
hitting the emulator. We could reduce this to just the ones writing to
memory, but that would involve quite a few further changes and
resulting code where even more code paths need to match up with one
another.

--- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
+++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
@@ -433,6 +433,7 @@ static struct x86_emulate_ops fuzz_emulo
     SET(wbinvd),
     SET(invlpg),
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
     .cpuid      = emul_test_cpuid,
 };
 #undef SET
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -293,6 +293,7 @@ static struct x86_emulate_ops emulops =
     .read_cr    = emul_test_read_cr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
 };
 
 int main(int argc, char **argv)
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -138,4 +138,11 @@ int emul_test_get_fpu(
     return X86EMUL_OKAY;
 }
 
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
+{
+    /* TBD */
+}
+
 #include "x86_emulate/x86_emulate.c"
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -178,3 +178,7 @@ int emul_test_get_fpu(
     void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
+
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -15,6 +15,7 @@
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <asm/event.h>
+#include <asm/i387.h>
 #include <asm/xstate.h>
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
@@ -1619,6 +1620,35 @@ static int hvmemul_get_fpu(
 
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+    else if ( type == X86EMUL_FPU_fpu )
+    {
+        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
+            curr->arch.fpu_ctxt;
+
+        /*
+         * Latch current register state so that we can back out changes
+         * if needed (namely when a memory write fails after register state
+         * has already been updated).
+         * NB: We don't really need the "enable" part of the called function
+         * (->fpu_dirtied set implies CR0.TS clear), but the additional
+         * overhead should be low enough to not warrant introduction of yet
+         * another slightly different function. However, we need to undo the
+         * ->fpu_dirtied clearing the function does as well as the possible
+         * masking of all exceptions by FNSTENV.)
+         */
+        save_fpu_enable();
+        curr->fpu_dirtied = true;
+        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
+        {
+            uint16_t fcw;
+
+            asm ( "fnstcw %0" : "=m" (fcw) );
+            if ( (fcw & 0x3f) == 0x3f )
+                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
+            else
+                ASSERT(fcw == fpu_ctxt->fcw);
+        }
+    }
 
     curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
     curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
@@ -1627,10 +1657,24 @@ static int hvmemul_get_fpu(
 }
 
 static void hvmemul_put_fpu(
-    struct x86_emulate_ctxt *ctxt)
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
 {
     struct vcpu *curr = current;
+
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
+
+    if ( backout == X86EMUL_FPU_fpu )
+    {
+        /*
+         * To back out changes to the register file simply adjust state such
+         * that upon next FPU insn use by the guest we'll reload the state
+         * saved (or freshly loaded) by hvmemul_get_fpu().
+         */
+        curr->fpu_dirtied = false;
+        stts();
+        hvm_funcs.fpu_leave(curr);
+    }
 }
 
 static int hvmemul_invlpg(
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2268,6 +2268,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = svm_update_guest_cr,
     .update_guest_efer    = svm_update_guest_efer,
     .update_guest_vendor  = svm_update_guest_vendor,
+    .fpu_leave            = svm_fpu_leave,
     .set_guest_pat        = svm_set_guest_pat,
     .get_guest_pat        = svm_get_guest_pat,
     .set_tsc_offset       = svm_set_tsc_offset,
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2128,6 +2128,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = vmx_update_guest_cr,
     .update_guest_efer    = vmx_update_guest_efer,
     .update_guest_vendor  = vmx_update_guest_vendor,
+    .fpu_leave            = vmx_fpu_leave,
     .set_guest_pat        = vmx_set_guest_pat,
     .get_guest_pat        = vmx_get_guest_pat,
     .set_tsc_offset       = vmx_set_tsc_offset,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -965,6 +965,7 @@ static int _get_fpu(
     {
         unsigned long cr0;
 
+        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
         fic->type = type;
 
         fail_if(!ops->read_cr);
@@ -1026,11 +1027,14 @@ do {
 
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
+    bool failed_late,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt);
+    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_none);
     fic->type = X86EMUL_FPU_none;
 }
 
@@ -3716,9 +3720,9 @@ x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
+        fic.insn_bytes = 1;
         asm volatile ( "fwait" ::: "memory" );
         check_fpu_exn(&fic);
         break;
@@ -7892,7 +7896,7 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, false, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7916,7 +7920,7 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -437,9 +437,12 @@ struct x86_emulate_ops
      * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers.
      *  The handler, if installed, must be prepared to get called without
      *  the get_fpu one having got called before!
+     * @backout: Undo updates to the specified register file (can, besides
+     *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
      */
     void (*put_fpu)(
-        struct x86_emulate_ctxt *ctxt);
+        struct x86_emulate_ctxt *ctxt,
+        enum x86_emulate_fpu_type backout);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -140,6 +140,8 @@ struct hvm_function_table {
 
     void (*update_guest_vendor)(struct vcpu *v);
 
+    void (*fpu_leave)(struct vcpu *v);
+
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);

Comments

Paul Durrant March 15, 2017, 12:06 p.m. UTC | #1
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 15 March 2017 10:28
> To: xen-devel <xen-devel@lists.xenproject.org>
> Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>; Andrew
> Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>; Jun Nakajima <jun.nakajima@intel.com>; Kevin
> Tian <kevin.tian@intel.com>; Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Subject: [PATCH v2 2/3] x86emul: correct handling of FPU insns faulting on
> memory write
> 
> When an FPU instruction with a memory destination fails during the
> memory write, it should not affect FPU register state. Due to the way
> we emulate FPU (and SIMD) instructions, we can only guarantee this by
> - backing out changes to the FPU register state in such a case or
> - doing a descriptor read and/or page walk up front, perhaps with the
>   stubs accessing the actual memory location then.
> The latter would require a significant change in how the emulator does
> its guest memory accessing, so for now the former variant is being
> chosen.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>

emulate.c parts...

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
> v2: Re-base.
> ---
> Note that the state save overhead (unless state hadn't been loaded at
> all before, which should only be possible if a guest is fiddling with
> the instruction stream under emulation) is taken for every FPU insn
> hitting the emulator. We could reduce this to just the ones writing to
> memory, but that would involve quite a few further changes and
> resulting code where even more code paths need to match up with one
> another.
> 
> --- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
> +++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
> @@ -433,6 +433,7 @@ static struct x86_emulate_ops fuzz_emulo
>      SET(wbinvd),
>      SET(invlpg),
>      .get_fpu    = emul_test_get_fpu,
> +    .put_fpu    = emul_test_put_fpu,
>      .cpuid      = emul_test_cpuid,
>  };
>  #undef SET
> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -293,6 +293,7 @@ static struct x86_emulate_ops emulops =
>      .read_cr    = emul_test_read_cr,
>      .read_msr   = read_msr,
>      .get_fpu    = emul_test_get_fpu,
> +    .put_fpu    = emul_test_put_fpu,
>  };
> 
>  int main(int argc, char **argv)
> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -138,4 +138,11 @@ int emul_test_get_fpu(
>      return X86EMUL_OKAY;
>  }
> 
> +void emul_test_put_fpu(
> +    struct x86_emulate_ctxt *ctxt,
> +    enum x86_emulate_fpu_type backout)
> +{
> +    /* TBD */
> +}
> +
>  #include "x86_emulate/x86_emulate.c"
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -178,3 +178,7 @@ int emul_test_get_fpu(
>      void *exception_callback_arg,
>      enum x86_emulate_fpu_type type,
>      struct x86_emulate_ctxt *ctxt);
> +
> +void emul_test_put_fpu(
> +    struct x86_emulate_ctxt *ctxt,
> +    enum x86_emulate_fpu_type backout);
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -15,6 +15,7 @@
>  #include <xen/paging.h>
>  #include <xen/trace.h>
>  #include <asm/event.h>
> +#include <asm/i387.h>
>  #include <asm/xstate.h>
>  #include <asm/hvm/emulate.h>
>  #include <asm/hvm/hvm.h>
> @@ -1619,6 +1620,35 @@ static int hvmemul_get_fpu(
> 
>      if ( !curr->fpu_dirtied )
>          hvm_funcs.fpu_dirty_intercept();
> +    else if ( type == X86EMUL_FPU_fpu )
> +    {
> +        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
> +            curr->arch.fpu_ctxt;
> +
> +        /*
> +         * Latch current register state so that we can back out changes
> +         * if needed (namely when a memory write fails after register state
> +         * has already been updated).
> +         * NB: We don't really need the "enable" part of the called function
> +         * (->fpu_dirtied set implies CR0.TS clear), but the additional
> +         * overhead should be low enough to not warrant introduction of yet
> +         * another slightly different function. However, we need to undo the
> +         * ->fpu_dirtied clearing the function does as well as the possible
> +         * masking of all exceptions by FNSTENV.)
> +         */
> +        save_fpu_enable();
> +        curr->fpu_dirtied = true;
> +        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
> +        {
> +            uint16_t fcw;
> +
> +            asm ( "fnstcw %0" : "=m" (fcw) );
> +            if ( (fcw & 0x3f) == 0x3f )
> +                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
> +            else
> +                ASSERT(fcw == fpu_ctxt->fcw);
> +        }
> +    }
> 
>      curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
>      curr->arch.hvm_vcpu.fpu_exception_callback_arg =
> exception_callback_arg;
> @@ -1627,10 +1657,24 @@ static int hvmemul_get_fpu(
>  }
> 
>  static void hvmemul_put_fpu(
> -    struct x86_emulate_ctxt *ctxt)
> +    struct x86_emulate_ctxt *ctxt,
> +    enum x86_emulate_fpu_type backout)
>  {
>      struct vcpu *curr = current;
> +
>      curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
> +
> +    if ( backout == X86EMUL_FPU_fpu )
> +    {
> +        /*
> +         * To back out changes to the register file simply adjust state such
> +         * that upon next FPU insn use by the guest we'll reload the state
> +         * saved (or freshly loaded) by hvmemul_get_fpu().
> +         */
> +        curr->fpu_dirtied = false;
> +        stts();
> +        hvm_funcs.fpu_leave(curr);
> +    }
>  }
> 
>  static int hvmemul_invlpg(
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2268,6 +2268,7 @@ static struct hvm_function_table __initd
>      .update_guest_cr      = svm_update_guest_cr,
>      .update_guest_efer    = svm_update_guest_efer,
>      .update_guest_vendor  = svm_update_guest_vendor,
> +    .fpu_leave            = svm_fpu_leave,
>      .set_guest_pat        = svm_set_guest_pat,
>      .get_guest_pat        = svm_get_guest_pat,
>      .set_tsc_offset       = svm_set_tsc_offset,
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2128,6 +2128,7 @@ static struct hvm_function_table __initd
>      .update_guest_cr      = vmx_update_guest_cr,
>      .update_guest_efer    = vmx_update_guest_efer,
>      .update_guest_vendor  = vmx_update_guest_vendor,
> +    .fpu_leave            = vmx_fpu_leave,
>      .set_guest_pat        = vmx_set_guest_pat,
>      .get_guest_pat        = vmx_get_guest_pat,
>      .set_tsc_offset       = vmx_set_tsc_offset,
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -965,6 +965,7 @@ static int _get_fpu(
>      {
>          unsigned long cr0;
> 
> +        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
>          fic->type = type;
> 
>          fail_if(!ops->read_cr);
> @@ -1026,11 +1027,14 @@ do {
> 
>  static void put_fpu(
>      struct fpu_insn_ctxt *fic,
> +    bool failed_late,
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
> -    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> -        ops->put_fpu(ctxt);
> +    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
> +        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
> +    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> +        ops->put_fpu(ctxt, X86EMUL_FPU_none);
>      fic->type = X86EMUL_FPU_none;
>  }
> 
> @@ -3716,9 +3720,9 @@ x86_emulate(
>          break;
> 
>      case 0x9b:  /* wait/fwait */
> -        fic.insn_bytes = 1;
>          host_and_vcpu_must_have(fpu);
>          get_fpu(X86EMUL_FPU_wait, &fic);
> +        fic.insn_bytes = 1;
>          asm volatile ( "fwait" ::: "memory" );
>          check_fpu_exn(&fic);
>          break;
> @@ -7892,7 +7896,7 @@ x86_emulate(
>      }
> 
>   complete_insn: /* Commit shadow register state. */
> -    put_fpu(&fic, ctxt, ops);
> +    put_fpu(&fic, false, ctxt, ops);
> 
>      /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
>      if ( !mode_64bit() )
> @@ -7916,7 +7920,7 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
> 
>   done:
> -    put_fpu(&fic, ctxt, ops);
> +    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
>      put_stub(stub);
>      return rc;
>  #undef state
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -437,9 +437,12 @@ struct x86_emulate_ops
>       * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception
> handlers.
>       *  The handler, if installed, must be prepared to get called without
>       *  the get_fpu one having got called before!
> +     * @backout: Undo updates to the specified register file (can, besides
> +     *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
>       */
>      void (*put_fpu)(
> -        struct x86_emulate_ctxt *ctxt);
> +        struct x86_emulate_ctxt *ctxt,
> +        enum x86_emulate_fpu_type backout);
> 
>      /* invlpg: Invalidate paging structures which map addressed byte. */
>      int (*invlpg)(
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -140,6 +140,8 @@ struct hvm_function_table {
> 
>      void (*update_guest_vendor)(struct vcpu *v);
> 
> +    void (*fpu_leave)(struct vcpu *v);
> +
>      int  (*get_guest_pat)(struct vcpu *v, u64 *);
>      int  (*set_guest_pat)(struct vcpu *v, u64);
> 
>
Boris Ostrovsky March 15, 2017, 1:24 p.m. UTC | #2
On 03/15/2017 06:28 AM, Jan Beulich wrote:
> When an FPU instruction with a memory destination fails during the
> memory write, it should not affect FPU register state. Due to the way
> we emulate FPU (and SIMD) instructions, we can only guarantee this by
> - backing out changes to the FPU register state in such a case or
> - doing a descriptor read and/or page walk up front, perhaps with the
>   stubs accessing the actual memory location then.
> The latter would require a significant change in how the emulator does
> its guest memory accessing, so for now the former variant is being
> chosen.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

with one question:

>  
> @@ -3716,9 +3720,9 @@ x86_emulate(
>          break;
>  
>      case 0x9b:  /* wait/fwait */
> -        fic.insn_bytes = 1;
>          host_and_vcpu_must_have(fpu);
>          get_fpu(X86EMUL_FPU_wait, &fic);
> +        fic.insn_bytes = 1;
>          asm volatile ( "fwait" ::: "memory" );
>          check_fpu_exn(&fic);
>          break;
>


Why is this needed?

-boris
Jan Beulich March 15, 2017, 1:31 p.m. UTC | #3
>>> On 15.03.17 at 14:24, <boris.ostrovsky@oracle.com> wrote:
> On 03/15/2017 06:28 AM, Jan Beulich wrote:
>> @@ -3716,9 +3720,9 @@ x86_emulate(
>>          break;
>>  
>>      case 0x9b:  /* wait/fwait */
>> -        fic.insn_bytes = 1;
>>          host_and_vcpu_must_have(fpu);
>>          get_fpu(X86EMUL_FPU_wait, &fic);
>> +        fic.insn_bytes = 1;
>>          asm volatile ( "fwait" ::: "memory" );
>>          check_fpu_exn(&fic);
>>          break;
> 
> Why is this needed?

This isn't strictly needed, but desirable, due to the conditional being
added in

@@ -7916,7 +7920,7 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state

(both host_and_vcpu_must_have() and get_fpu() may end up
branching to "done"). Everywhere else the field is already being
set after such basic checks.

Jan
Boris Ostrovsky March 15, 2017, 1:48 p.m. UTC | #4
On 03/15/2017 09:31 AM, Jan Beulich wrote:
>>>> On 15.03.17 at 14:24, <boris.ostrovsky@oracle.com> wrote:
>> On 03/15/2017 06:28 AM, Jan Beulich wrote:
>>> @@ -3716,9 +3720,9 @@ x86_emulate(
>>>          break;
>>>  
>>>      case 0x9b:  /* wait/fwait */
>>> -        fic.insn_bytes = 1;
>>>          host_and_vcpu_must_have(fpu);
>>>          get_fpu(X86EMUL_FPU_wait, &fic);
>>> +        fic.insn_bytes = 1;
>>>          asm volatile ( "fwait" ::: "memory" );
>>>          check_fpu_exn(&fic);
>>>          break;
>> Why is this needed?
> This isn't strictly needed, but desirable, due to the conditional being
> added in
>
> @@ -7916,7 +7920,7 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>  
>   done:
> -    put_fpu(&fic, ctxt, ops);
> +    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
>      put_stub(stub);
>      return rc;
>  #undef state
>
> (both host_and_vcpu_must_have() and get_fpu() may end up
> branching to "done"). Everywhere else the field is already being
> set after such basic checks.

Ah, OK.

But fic is a local variable that is not initialized (is it?) so
insn_bytes may be non-zero anyway?

-boris
Jan Beulich March 15, 2017, 3:46 p.m. UTC | #5
>>> On 15.03.17 at 14:48, <boris.ostrovsky@oracle.com> wrote:
> On 03/15/2017 09:31 AM, Jan Beulich wrote:
>>>>> On 15.03.17 at 14:24, <boris.ostrovsky@oracle.com> wrote:
>>> On 03/15/2017 06:28 AM, Jan Beulich wrote:
>>>> @@ -3716,9 +3720,9 @@ x86_emulate(
>>>>          break;
>>>>  
>>>>      case 0x9b:  /* wait/fwait */
>>>> -        fic.insn_bytes = 1;
>>>>          host_and_vcpu_must_have(fpu);
>>>>          get_fpu(X86EMUL_FPU_wait, &fic);
>>>> +        fic.insn_bytes = 1;
>>>>          asm volatile ( "fwait" ::: "memory" );
>>>>          check_fpu_exn(&fic);
>>>>          break;
>>> Why is this needed?
>> This isn't strictly needed, but desirable, due to the conditional being
>> added in
>>
>> @@ -7916,7 +7920,7 @@ x86_emulate(
>>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>>  
>>   done:
>> -    put_fpu(&fic, ctxt, ops);
>> +    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
>>      put_stub(stub);
>>      return rc;
>>  #undef state
>>
>> (both host_and_vcpu_must_have() and get_fpu() may end up
>> branching to "done"). Everywhere else the field is already being
>> set after such basic checks.
> 
> Ah, OK.
> 
> But fic is a local variable that is not initialized (is it?) so
> insn_bytes may be non-zero anyway?

We have this at the top of x86_emulate():

    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };

(introduced by patch 1).

Jan
diff mbox

Patch

--- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
+++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
@@ -433,6 +433,7 @@  static struct x86_emulate_ops fuzz_emulo
     SET(wbinvd),
     SET(invlpg),
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
     .cpuid      = emul_test_cpuid,
 };
 #undef SET
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -293,6 +293,7 @@  static struct x86_emulate_ops emulops =
     .read_cr    = emul_test_read_cr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
 };
 
 int main(int argc, char **argv)
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -138,4 +138,11 @@  int emul_test_get_fpu(
     return X86EMUL_OKAY;
 }
 
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
+{
+    /* TBD */
+}
+
 #include "x86_emulate/x86_emulate.c"
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -178,3 +178,7 @@  int emul_test_get_fpu(
     void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
+
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -15,6 +15,7 @@ 
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <asm/event.h>
+#include <asm/i387.h>
 #include <asm/xstate.h>
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
@@ -1619,6 +1620,35 @@  static int hvmemul_get_fpu(
 
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+    else if ( type == X86EMUL_FPU_fpu )
+    {
+        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
+            curr->arch.fpu_ctxt;
+
+        /*
+         * Latch current register state so that we can back out changes
+         * if needed (namely when a memory write fails after register state
+         * has already been updated).
+         * NB: We don't really need the "enable" part of the called function
+         * (->fpu_dirtied set implies CR0.TS clear), but the additional
+         * overhead should be low enough to not warrant introduction of yet
+         * another slightly different function. However, we need to undo the
+         * ->fpu_dirtied clearing the function does as well as the possible
+         * masking of all exceptions by FNSTENV.)
+         */
+        save_fpu_enable();
+        curr->fpu_dirtied = true;
+        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
+        {
+            uint16_t fcw;
+
+            asm ( "fnstcw %0" : "=m" (fcw) );
+            if ( (fcw & 0x3f) == 0x3f )
+                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
+            else
+                ASSERT(fcw == fpu_ctxt->fcw);
+        }
+    }
 
     curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
     curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
@@ -1627,10 +1657,24 @@  static int hvmemul_get_fpu(
 }
 
 static void hvmemul_put_fpu(
-    struct x86_emulate_ctxt *ctxt)
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
 {
     struct vcpu *curr = current;
+
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
+
+    if ( backout == X86EMUL_FPU_fpu )
+    {
+        /*
+         * To back out changes to the register file simply adjust state such
+         * that upon next FPU insn use by the guest we'll reload the state
+         * saved (or freshly loaded) by hvmemul_get_fpu().
+         */
+        curr->fpu_dirtied = false;
+        stts();
+        hvm_funcs.fpu_leave(curr);
+    }
 }
 
 static int hvmemul_invlpg(
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2268,6 +2268,7 @@  static struct hvm_function_table __initd
     .update_guest_cr      = svm_update_guest_cr,
     .update_guest_efer    = svm_update_guest_efer,
     .update_guest_vendor  = svm_update_guest_vendor,
+    .fpu_leave            = svm_fpu_leave,
     .set_guest_pat        = svm_set_guest_pat,
     .get_guest_pat        = svm_get_guest_pat,
     .set_tsc_offset       = svm_set_tsc_offset,
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2128,6 +2128,7 @@  static struct hvm_function_table __initd
     .update_guest_cr      = vmx_update_guest_cr,
     .update_guest_efer    = vmx_update_guest_efer,
     .update_guest_vendor  = vmx_update_guest_vendor,
+    .fpu_leave            = vmx_fpu_leave,
     .set_guest_pat        = vmx_set_guest_pat,
     .get_guest_pat        = vmx_get_guest_pat,
     .set_tsc_offset       = vmx_set_tsc_offset,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -965,6 +965,7 @@  static int _get_fpu(
     {
         unsigned long cr0;
 
+        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
         fic->type = type;
 
         fail_if(!ops->read_cr);
@@ -1026,11 +1027,14 @@  do {
 
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
+    bool failed_late,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt);
+    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_none);
     fic->type = X86EMUL_FPU_none;
 }
 
@@ -3716,9 +3720,9 @@  x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
+        fic.insn_bytes = 1;
         asm volatile ( "fwait" ::: "memory" );
         check_fpu_exn(&fic);
         break;
@@ -7892,7 +7896,7 @@  x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, false, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7916,7 +7920,7 @@  x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -437,9 +437,12 @@  struct x86_emulate_ops
      * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers.
      *  The handler, if installed, must be prepared to get called without
      *  the get_fpu one having got called before!
+     * @backout: Undo updates to the specified register file (can, besides
+     *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
      */
     void (*put_fpu)(
-        struct x86_emulate_ctxt *ctxt);
+        struct x86_emulate_ctxt *ctxt,
+        enum x86_emulate_fpu_type backout);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -140,6 +140,8 @@  struct hvm_function_table {
 
     void (*update_guest_vendor)(struct vcpu *v);
 
+    void (*fpu_leave)(struct vcpu *v);
+
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);