diff mbox

[5/9] x86/vvmx: Make updating shadow EPTP value more efficient

Message ID 20170929150144.7602-5-george.dunlap@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

George Dunlap Sept. 29, 2017, 3:01 p.m. UTC
At the moment, the shadow EPTP value is written unconditionally in
ept_handle_violation().

Instead, write the value on vmentry to the guest; but only write it if
the value needs updating.

To detect this, add a flag to the nestedvcpu struct, stale_np2m, to
indicate when such an action is necessary.  Set it when the nested p2m
changes or when the np2m is flushed by an IPI, and clear it when we
write the new value.

Since an IPI invalidating the p2m may happen between
nvmx_switch_guest() and vmx_vmenter, but we can't perform the vmwrite
with interrupts disabled, check the flag just before entering the
guest and restart the vmentry if it's set.

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
v2
- Merge patches 6, 7, and 14 ("x86/np2m: add stale_np2m flag",
    "x86/vvmx: restart nested vmentry in case of stale_np2m", and
    "x86/vvmx: remove EPTP write from ept_handle_violation()")
- Write to stale_np2m before the vmwrite to avoid a race

CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
---
 xen/arch/x86/hvm/nestedhvm.c   |  2 ++
 xen/arch/x86/hvm/vmx/entry.S   |  6 ++++++
 xen/arch/x86/hvm/vmx/vmx.c     | 14 +++++++-------
 xen/arch/x86/hvm/vmx/vvmx.c    | 20 ++++++++++++++++++++
 xen/arch/x86/mm/p2m.c          | 10 ++++++++--
 xen/include/asm-x86/hvm/vcpu.h |  1 +
 6 files changed, 44 insertions(+), 9 deletions(-)

Comments

Andrew Cooper Sept. 29, 2017, 3:56 p.m. UTC | #1
On 29/09/17 16:01, George Dunlap wrote:
> @@ -4203,13 +4197,17 @@ static void lbr_fixup(void)
>          bdw_erratum_bdf14_fixup();
>  }
>  
> -void vmx_vmenter_helper(const struct cpu_user_regs *regs)
> +int vmx_vmenter_helper(const struct cpu_user_regs *regs)

What are the semantics of this call?  The result looks boolean, and
indicates that the vmentry should be aborted?

>  {
>      struct vcpu *curr = current;
>      u32 new_asid, old_asid;
>      struct hvm_vcpu_asid *p_asid;
>      bool_t need_flush;
>  
> +    /* Shadow EPTP can't be updated here because irqs are disabled */
> +     if ( nestedhvm_vcpu_in_guestmode(curr) && vcpu_nestedhvm(curr).stale_np2m )
> +         return 1;
> +
>      if ( curr->domain->arch.hvm_domain.pi_ops.do_resume )
>          curr->domain->arch.hvm_domain.pi_ops.do_resume(curr);
>  
> @@ -4270,6 +4268,8 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>      __vmwrite(GUEST_RIP,    regs->rip);
>      __vmwrite(GUEST_RSP,    regs->rsp);
>      __vmwrite(GUEST_RFLAGS, regs->rflags | X86_EFLAGS_MBS);
> +
> +    return 0;
>  }
>  
>  /*
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 2f468e6ced..48e37158af 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -1405,12 +1405,32 @@ static void virtual_vmexit(struct cpu_user_regs *regs)
>      vmsucceed(regs);
>  }
>  
> +static void nvmx_eptp_update(void)
> +{

struct vcpu *curr = current; will most likely half the compiled size of
this function.

> +    if ( !nestedhvm_vcpu_in_guestmode(current) ||
> +          vcpu_nestedhvm(current).nv_vmexit_pending ||
> +         !vcpu_nestedhvm(current).stale_np2m ||
> +         !nestedhvm_paging_mode_hap(current) )
> +        return;
> +
> +    /*
> +     * Interrupts are enabled here, so we need to clear stale_np2m
> +     * before we do the vmwrite.  If we do it in the other order, an
> +     * and IPI comes in changing the shadow eptp after the vmwrite,
> +     * we'll complete the vmenter with a stale eptp value.
> +     */
> +    vcpu_nestedhvm(current).stale_np2m = false;
> +    __vmwrite(EPT_POINTER, get_shadow_eptp(current));
> +}
> +
>  void nvmx_switch_guest(void)
>  {
>      struct vcpu *v = current;
>      struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
>      struct cpu_user_regs *regs = guest_cpu_user_regs();
>  
> +    nvmx_eptp_update();
> +
>      /*
>       * A pending IO emulation may still be not finished. In this case, no
>       * virtual vmswitch is allowed. Or else, the following IO emulation will
> diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
> index 6c54773f1c..5cfa4b4aa4 100644
> --- a/xen/include/asm-x86/hvm/vcpu.h
> +++ b/xen/include/asm-x86/hvm/vcpu.h
> @@ -115,6 +115,7 @@ struct nestedvcpu {
>  
>      bool_t nv_flushp2m; /* True, when p2m table must be flushed */
>      struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */
> +    bool stale_np2m; /* True when p2m_base in VMCX02 is no longer valid */

VMCx02 ? which helps distinguish the two parts of semantic information
encoded there, and to avoid looking like we've gained a third acronym.

~Andrew
Sergey Dyasli Oct. 2, 2017, 9:41 a.m. UTC | #2
On Fri, 2017-09-29 at 16:56 +0100, Andrew Cooper wrote:
> On 29/09/17 16:01, George Dunlap wrote:

> > @@ -4203,13 +4197,17 @@ static void lbr_fixup(void)

> >          bdw_erratum_bdf14_fixup();

> >  }

> >  

> > -void vmx_vmenter_helper(const struct cpu_user_regs *regs)

> > +int vmx_vmenter_helper(const struct cpu_user_regs *regs)

> 

> What are the semantics of this call?  The result looks boolean, and

> indicates that the vmentry should be aborted?


Currently vmx_vmenter_helper() returns !0 if the vmentry must be
restarted.

> 

> >  {

> >      struct vcpu *curr = current;

> >      u32 new_asid, old_asid;

> >      struct hvm_vcpu_asid *p_asid;

> >      bool_t need_flush;

> >  

> > +    /* Shadow EPTP can't be updated here because irqs are disabled */

> > +     if ( nestedhvm_vcpu_in_guestmode(curr) && vcpu_nestedhvm(curr).stale_np2m )

> > +         return 1;

> > +

> >      if ( curr->domain->arch.hvm_domain.pi_ops.do_resume )

> >          curr->domain->arch.hvm_domain.pi_ops.do_resume(curr);

> >  

> > @@ -4270,6 +4268,8 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)

> >      __vmwrite(GUEST_RIP,    regs->rip);

> >      __vmwrite(GUEST_RSP,    regs->rsp);

> >      __vmwrite(GUEST_RFLAGS, regs->rflags | X86_EFLAGS_MBS);

> > +

> > +    return 0;

> >  }

> >  

> >  /*

> > diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c

> > index 2f468e6ced..48e37158af 100644

> > --- a/xen/arch/x86/hvm/vmx/vvmx.c

> > +++ b/xen/arch/x86/hvm/vmx/vvmx.c

> > @@ -1405,12 +1405,32 @@ static void virtual_vmexit(struct cpu_user_regs *regs)

> >      vmsucceed(regs);

> >  }

> >  

> > +static void nvmx_eptp_update(void)

> > +{

> 

> struct vcpu *curr = current; will most likely half the compiled size of

> this function.


Yes, passing a struct vcpu *v to nvmx_eptp_update() removes all
the additional:

    mov    %rsp,%rax
    or     $0x7fff,%rax
    
I wasn't aware of such behavior and will correct the usage of current
for all patches in v3.

> 

> > +    if ( !nestedhvm_vcpu_in_guestmode(current) ||

> > +          vcpu_nestedhvm(current).nv_vmexit_pending ||

> > +         !vcpu_nestedhvm(current).stale_np2m ||

> > +         !nestedhvm_paging_mode_hap(current) )

> > +        return;

> > +

> > +    /*

> > +     * Interrupts are enabled here, so we need to clear stale_np2m

> > +     * before we do the vmwrite.  If we do it in the other order, an

> > +     * and IPI comes in changing the shadow eptp after the vmwrite,

> > +     * we'll complete the vmenter with a stale eptp value.

> > +     */

> > +    vcpu_nestedhvm(current).stale_np2m = false;

> > +    __vmwrite(EPT_POINTER, get_shadow_eptp(current));

> > +}

> > +

> >  void nvmx_switch_guest(void)

> >  {

> >      struct vcpu *v = current;

> >      struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);

> >      struct cpu_user_regs *regs = guest_cpu_user_regs();

> >  

> > +    nvmx_eptp_update();

> > +

> >      /*

> >       * A pending IO emulation may still be not finished. In this case, no

> >       * virtual vmswitch is allowed. Or else, the following IO emulation will

> > diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h

> > index 6c54773f1c..5cfa4b4aa4 100644

> > --- a/xen/include/asm-x86/hvm/vcpu.h

> > +++ b/xen/include/asm-x86/hvm/vcpu.h

> > @@ -115,6 +115,7 @@ struct nestedvcpu {

> >  

> >      bool_t nv_flushp2m; /* True, when p2m table must be flushed */

> >      struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */

> > +    bool stale_np2m; /* True when p2m_base in VMCX02 is no longer valid */

> 

> VMCx02 ? which helps distinguish the two parts of semantic information

> encoded there, and to avoid looking like we've gained a third acronym.


I like this suggestion. Will update comments and commit messages for all
patches in v3.

-- 
Thanks,
Sergey
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/nestedhvm.c b/xen/arch/x86/hvm/nestedhvm.c
index f2f7469d86..74a464d162 100644
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -56,6 +56,7 @@  nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_vvmcxaddr = INVALID_PADDR;
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
+    nv->stale_np2m = false;
 
     hvm_asid_flush_vcpu_asid(&nv->nv_n2asid);
 
@@ -107,6 +108,7 @@  nestedhvm_flushtlb_ipi(void *info)
      */
     hvm_asid_flush_core();
     vcpu_nestedhvm(v).nv_p2m = NULL;
+    vcpu_nestedhvm(v).stale_np2m = true;
 }
 
 void
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 53eedc6363..9fb8f89220 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -79,6 +79,8 @@  UNLIKELY_END(realmode)
 
         mov  %rsp,%rdi
         call vmx_vmenter_helper
+        cmp  $0,%eax
+        jne .Lvmx_vmentry_restart
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
 
         pop  %r15
@@ -117,6 +119,10 @@  ENTRY(vmx_asm_do_vmentry)
         GET_CURRENT(bx)
         jmp  .Lvmx_do_vmentry
 
+.Lvmx_vmentry_restart:
+        sti
+        jmp  .Lvmx_do_vmentry
+
 .Lvmx_goto_emulator:
         sti
         mov  %rsp,%rdi
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 9cfa9b6965..c9a4111267 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3249,12 +3249,6 @@  static void ept_handle_violation(ept_qual_t q, paddr_t gpa)
     case 0:         // Unhandled L1 EPT violation
         break;
     case 1:         // This violation is handled completly
-        /*Current nested EPT maybe flushed by other vcpus, so need
-         * to re-set its shadow EPTP pointer.
-         */
-        if ( nestedhvm_vcpu_in_guestmode(current) &&
-                        nestedhvm_paging_mode_hap(current ) )
-            __vmwrite(EPT_POINTER, get_shadow_eptp(current));
         return;
     case -1:        // This vioaltion should be injected to L1 VMM
         vcpu_nestedhvm(current).nv_vmexit_pending = 1;
@@ -4203,13 +4197,17 @@  static void lbr_fixup(void)
         bdw_erratum_bdf14_fixup();
 }
 
-void vmx_vmenter_helper(const struct cpu_user_regs *regs)
+int vmx_vmenter_helper(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr = current;
     u32 new_asid, old_asid;
     struct hvm_vcpu_asid *p_asid;
     bool_t need_flush;
 
+    /* Shadow EPTP can't be updated here because irqs are disabled */
+     if ( nestedhvm_vcpu_in_guestmode(curr) && vcpu_nestedhvm(curr).stale_np2m )
+         return 1;
+
     if ( curr->domain->arch.hvm_domain.pi_ops.do_resume )
         curr->domain->arch.hvm_domain.pi_ops.do_resume(curr);
 
@@ -4270,6 +4268,8 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     __vmwrite(GUEST_RIP,    regs->rip);
     __vmwrite(GUEST_RSP,    regs->rsp);
     __vmwrite(GUEST_RFLAGS, regs->rflags | X86_EFLAGS_MBS);
+
+    return 0;
 }
 
 /*
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 2f468e6ced..48e37158af 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1405,12 +1405,32 @@  static void virtual_vmexit(struct cpu_user_regs *regs)
     vmsucceed(regs);
 }
 
+static void nvmx_eptp_update(void)
+{
+    if ( !nestedhvm_vcpu_in_guestmode(current) ||
+          vcpu_nestedhvm(current).nv_vmexit_pending ||
+         !vcpu_nestedhvm(current).stale_np2m ||
+         !nestedhvm_paging_mode_hap(current) )
+        return;
+
+    /*
+     * Interrupts are enabled here, so we need to clear stale_np2m
+     * before we do the vmwrite.  If we do it in the other order, an
+     * and IPI comes in changing the shadow eptp after the vmwrite,
+     * we'll complete the vmenter with a stale eptp value.
+     */
+    vcpu_nestedhvm(current).stale_np2m = false;
+    __vmwrite(EPT_POINTER, get_shadow_eptp(current));
+}
+
 void nvmx_switch_guest(void)
 {
     struct vcpu *v = current;
     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
     struct cpu_user_regs *regs = guest_cpu_user_regs();
 
+    nvmx_eptp_update();
+
     /*
      * A pending IO emulation may still be not finished. In this case, no
      * virtual vmswitch is allowed. Or else, the following IO emulation will
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index aa3182dec6..fd48a3b9db 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1812,6 +1812,12 @@  static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
     cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
 }
 
+static void nvcpu_flush(struct vcpu *v)
+{
+    hvm_asid_flush_vcpu(v);
+    vcpu_nestedhvm(v).stale_np2m = true;
+}
+
 struct p2m_domain *
 p2m_get_nestedp2m_locked(struct vcpu *v)
 {
@@ -1835,7 +1841,7 @@  p2m_get_nestedp2m_locked(struct vcpu *v)
         if ( p2m->np2m_base == np2m_base || p2m->np2m_base == P2M_BASE_EADDR )
         {
             if ( p2m->np2m_base == P2M_BASE_EADDR )
-                hvm_asid_flush_vcpu(v);
+                nvcpu_flush(v);
             p2m->np2m_base = np2m_base;
             assign_np2m(v, p2m);
             nestedp2m_unlock(d);
@@ -1851,7 +1857,7 @@  p2m_get_nestedp2m_locked(struct vcpu *v)
     p2m_flush_table(p2m);
     p2m_lock(p2m);
     p2m->np2m_base = np2m_base;
-    hvm_asid_flush_vcpu(v);
+    nvcpu_flush(v);
     assign_np2m(v, p2m);
     nestedp2m_unlock(d);
 
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 6c54773f1c..5cfa4b4aa4 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -115,6 +115,7 @@  struct nestedvcpu {
 
     bool_t nv_flushp2m; /* True, when p2m table must be flushed */
     struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */
+    bool stale_np2m; /* True when p2m_base in VMCX02 is no longer valid */
 
     struct hvm_vcpu_asid nv_n2asid;