diff mbox

[PATCHv5,1/2] x86/ept: invalidate guest physical mappings on VMENTER

Message ID 1450365426-23167-2-git-send-email-david.vrabel@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Vrabel Dec. 17, 2015, 3:17 p.m. UTC
If a guest allocates a page and the tlbflush_timestamp on the page
indicates that a TLB flush of the previous owner is required, only the
linear and combined mappings are invalidated.  The guest-physical
mappings are not invalidated.

This is currently safe because the EPT code ensures that the
guest-physical and combined mappings are invalidated /before/ the page
is freed.  However, this prevents us from deferring the EPT invalidate
until after the page is freed (e.g., to defer the invalidate until the
p2m locks are released).

The TLB flush that may be done after allocating page already causes
the original guest to VMEXIT, thus on VMENTER we can do an INVEPT if
one is pending.

This means __ept_sync_domain() need not do anything and the thus the
on_selected_cpu() call does not need to wait for as long.

ept_sync_domain() now marks all PCPUs as needing to be invalidated,
including PCPUs that the domain has not run on.  We still only IPI
those PCPUs that are active so this does not result in any more INVEPT
calls.

We do not attempt to track when PCPUs may have cached translations
because the only safe way to clear this per-CPU state is if
immediately after an invalidate the PCPU is not active (i.e., the PCPU
is not in d->domain_dirty_cpumask).  Since we only invalidate on
VMENTER or by IPIing active PCPUs this can never happen.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
---
v4:
- __ept_sync_domain() is a no-op -- invalidates are done before VMENTER.
- initialize ept->invalidate to all ones so the initial invalidate is
  always done.
---
 xen/arch/x86/hvm/vmx/vmx.c         | 22 ++++++++++------------
 xen/arch/x86/mm/p2m-ept.c          | 29 ++++++++++++++---------------
 xen/include/asm-x86/hvm/vmx/vmcs.h |  3 +--
 3 files changed, 25 insertions(+), 29 deletions(-)

Comments

Andrew Cooper Dec. 17, 2015, 3:56 p.m. UTC | #1
On 17/12/15 15:17, David Vrabel wrote:
> diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
> index a8d4d5b..e778d86 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -67,7 +67,7 @@ struct ept_data {
>          };
>          u64 eptp;
>      };
> -    cpumask_var_t synced_mask;
> +    cpumask_var_t invalidate;

Comment? :(

~Andrew
Tian, Kevin Dec. 18, 2015, 7:53 a.m. UTC | #2
> From: David Vrabel [mailto:david.vrabel@citrix.com]
> Sent: Thursday, December 17, 2015 11:17 PM

[...]

> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index f7c5e4f..cca35f2 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c

[...]

> @@ -3507,6 +3495,16 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>      if ( unlikely(need_flush) )
>          vpid_sync_all();
> 
> +    if ( paging_mode_hap(curr->domain) )
> +    {
> +        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
> +        unsigned int cpu = smp_processor_id();
> +
> +        if ( cpumask_test_cpu(cpu, ept->invalidate)
> +             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )

Just test_and_clear should be enough.

> +            __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
> +    }
> +
>   out:
>      HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
> 
> diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
> index eef0372..6e0cf89 100644
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -1089,9 +1089,10 @@ static void ept_memory_type_changed(struct p2m_domain
> *p2m)
> 
>  static void __ept_sync_domain(void *info)
>  {
> -    struct ept_data *ept = &((struct p2m_domain *)info)->ept;
> -
> -    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
> +    /*
> +     * The invalidate will be done before VMENTER (see

invalidate -> invalidation?

> +     * vmx_vmenter_helper()).
> +     */
>  }
> 
>  void ept_sync_domain(struct p2m_domain *p2m)
> @@ -1107,16 +1108,10 @@ void ept_sync_domain(struct p2m_domain *p2m)
>      if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
>          p2m_flush_nestedp2m(d);
> 
> -    /*
> -     * Flush active cpus synchronously. Flush others the next time this domain
> -     * is scheduled onto them. We accept the race of other CPUs adding to
> -     * the ept_synced mask before on_selected_cpus() reads it, resulting in
> -     * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack.
> -     */
> -    cpumask_and(ept_get_synced_mask(ept),
> -                d->domain_dirty_cpumask, &cpu_online_map);
> +    /* May need to invalidate on all PCPUs. */

It'd be good to add your earlier description why invalidating all PCPUs are
OK here, to help others better understand the logic later.

Thanks
Kevin
David Vrabel Dec. 18, 2015, 10:18 a.m. UTC | #3
On 18/12/15 07:53, Tian, Kevin wrote:
>> From: David Vrabel [mailto:david.vrabel@citrix.com]
>> Sent: Thursday, December 17, 2015 11:17 PM
> 
> [...]
> 
>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>> index f7c5e4f..cca35f2 100644
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> 
> [...]
> 
>> @@ -3507,6 +3495,16 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>>      if ( unlikely(need_flush) )
>>          vpid_sync_all();
>>
>> +    if ( paging_mode_hap(curr->domain) )
>> +    {
>> +        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
>> +        unsigned int cpu = smp_processor_id();
>> +
>> +        if ( cpumask_test_cpu(cpu, ept->invalidate)
>> +             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )
> 
> Just test_and_clear should be enough.

The first test is to avoid the locked test and clear in the common case.
 But this is probably better written as

  if ( cpumask_test_cpu(cpu, ept->invalidate) )
  {
      cpumask_clear_cpu(cpu, ept->invalidate);
      __invept(...);
   }

>> --- a/xen/arch/x86/mm/p2m-ept.c
>> +++ b/xen/arch/x86/mm/p2m-ept.c
>> @@ -1089,9 +1089,10 @@ static void ept_memory_type_changed(struct p2m_domain
>> *p2m)
>>
>>  static void __ept_sync_domain(void *info)
>>  {
>> -    struct ept_data *ept = &((struct p2m_domain *)info)->ept;
>> -
>> -    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
>> +    /*
>> +     * The invalidate will be done before VMENTER (see
> 
> invalidate -> invalidation?

That would be the correct English grammer, yes.

David
George Dunlap Dec. 18, 2015, 10:24 a.m. UTC | #4
On 18/12/15 10:18, David Vrabel wrote:
> On 18/12/15 07:53, Tian, Kevin wrote:
>>> From: David Vrabel [mailto:david.vrabel@citrix.com]
>>> Sent: Thursday, December 17, 2015 11:17 PM
>>
>> [...]
>>
>>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>>> index f7c5e4f..cca35f2 100644
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>
>> [...]
>>
>>> @@ -3507,6 +3495,16 @@ void vmx_vmenter_helper(const struct cpu_user_regs *regs)
>>>      if ( unlikely(need_flush) )
>>>          vpid_sync_all();
>>>
>>> +    if ( paging_mode_hap(curr->domain) )
>>> +    {
>>> +        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
>>> +        unsigned int cpu = smp_processor_id();
>>> +
>>> +        if ( cpumask_test_cpu(cpu, ept->invalidate)
>>> +             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )
>>
>> Just test_and_clear should be enough.
> 
> The first test is to avoid the locked test and clear in the common case.
>  But this is probably better written as
> 
>   if ( cpumask_test_cpu(cpu, ept->invalidate) )
>   {
>       cpumask_clear_cpu(cpu, ept->invalidate);
>       __invept(...);
>    }
> 
>>> --- a/xen/arch/x86/mm/p2m-ept.c
>>> +++ b/xen/arch/x86/mm/p2m-ept.c
>>> @@ -1089,9 +1089,10 @@ static void ept_memory_type_changed(struct p2m_domain
>>> *p2m)
>>>
>>>  static void __ept_sync_domain(void *info)
>>>  {
>>> -    struct ept_data *ept = &((struct p2m_domain *)info)->ept;
>>> -
>>> -    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
>>> +    /*
>>> +     * The invalidate will be done before VMENTER (see
>>
>> invalidate -> invalidation?
> 
> That would be the correct English grammer, yes.

FWIW I think either one is acceptable English grammar.

 -George
David Vrabel Dec. 18, 2015, 10:30 a.m. UTC | #5
On 18/12/15 10:24, George Dunlap wrote:
>>>> +     * The invalidate will be done before VMENTER (see
>>>
>>> invalidate -> invalidation?
>>
>> That would be the correct English grammer, yes.
> 
> FWIW I think either one is acceptable English grammar.

invalidate is a verb, invalidation is the corresponding noun.  Although
I grant you that nouning verbs is quite common in informal English.  But
I think it best to use more formal English in comments etc. to benefit
non-native speakers.

David
Jan Beulich Dec. 18, 2015, 10:32 a.m. UTC | #6
>>> On 18.12.15 at 11:18, <david.vrabel@citrix.com> wrote:
> On 18/12/15 07:53, Tian, Kevin wrote:
>>> From: David Vrabel [mailto:david.vrabel@citrix.com]
>>> Sent: Thursday, December 17, 2015 11:17 PM
>> 
>> [...]
>> 
>>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>>> index f7c5e4f..cca35f2 100644
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> 
>> [...]
>> 
>>> @@ -3507,6 +3495,16 @@ void vmx_vmenter_helper(const struct cpu_user_regs 
> *regs)
>>>      if ( unlikely(need_flush) )
>>>          vpid_sync_all();
>>>
>>> +    if ( paging_mode_hap(curr->domain) )
>>> +    {
>>> +        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
>>> +        unsigned int cpu = smp_processor_id();
>>> +
>>> +        if ( cpumask_test_cpu(cpu, ept->invalidate)
>>> +             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )
>> 
>> Just test_and_clear should be enough.
> 
> The first test is to avoid the locked test and clear in the common case.
>  But this is probably better written as
> 
>   if ( cpumask_test_cpu(cpu, ept->invalidate) )
>   {
>       cpumask_clear_cpu(cpu, ept->invalidate);
>       __invept(...);
>    }

The question is what you care for more: Avoiding the invalidation if
you lose the race here, or avoiding the extra conditional. (Either is
correct in this context afaict.)

Jan
David Vrabel Dec. 18, 2015, 11:37 a.m. UTC | #7
On 18/12/15 10:32, Jan Beulich wrote:
>>>> On 18.12.15 at 11:18, <david.vrabel@citrix.com> wrote:
>> On 18/12/15 07:53, Tian, Kevin wrote:
>>>> From: David Vrabel [mailto:david.vrabel@citrix.com]
>>>> Sent: Thursday, December 17, 2015 11:17 PM
>>>
>>> [...]
>>>
>>>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>>>> index f7c5e4f..cca35f2 100644
>>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>>
>>> [...]
>>>
>>>> @@ -3507,6 +3495,16 @@ void vmx_vmenter_helper(const struct cpu_user_regs 
>> *regs)
>>>>      if ( unlikely(need_flush) )
>>>>          vpid_sync_all();
>>>>
>>>> +    if ( paging_mode_hap(curr->domain) )
>>>> +    {
>>>> +        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
>>>> +        unsigned int cpu = smp_processor_id();
>>>> +
>>>> +        if ( cpumask_test_cpu(cpu, ept->invalidate)
>>>> +             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )
>>>
>>> Just test_and_clear should be enough.
>>
>> The first test is to avoid the locked test and clear in the common case.
>>  But this is probably better written as
>>
>>   if ( cpumask_test_cpu(cpu, ept->invalidate) )
>>   {
>>       cpumask_clear_cpu(cpu, ept->invalidate);
>>       __invept(...);
>>    }
> 
> The question is what you care for more: Avoiding the invalidation if
> you lose the race here, or avoiding the extra conditional. (Either is
> correct in this context afaict.)

There is no race since each pcpu clears its own bit.

David
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f7c5e4f..cca35f2 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -744,24 +744,12 @@  static void vmx_ctxt_switch_from(struct vcpu *v)
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
 {
-    struct domain *d = v->domain;
     unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features;
-    struct ept_data *ept_data = &p2m_get_hostp2m(d)->ept;
 
     /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
     if ( old_cr4 != new_cr4 )
         write_cr4(new_cr4);
 
-    if ( paging_mode_hap(d) )
-    {
-        unsigned int cpu = smp_processor_id();
-        /* Test-and-test-and-set this CPU in the EPT-is-synced mask. */
-        if ( !cpumask_test_cpu(cpu, ept_get_synced_mask(ept_data)) &&
-             !cpumask_test_and_set_cpu(cpu,
-                                       ept_get_synced_mask(ept_data)) )
-            __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
-    }
-
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 }
@@ -3507,6 +3495,16 @@  void vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(need_flush) )
         vpid_sync_all();
 
+    if ( paging_mode_hap(curr->domain) )
+    {
+        struct ept_data *ept = &p2m_get_hostp2m(curr->domain)->ept;
+        unsigned int cpu = smp_processor_id();
+
+        if ( cpumask_test_cpu(cpu, ept->invalidate)
+             && cpumask_test_and_clear_cpu(cpu, ept->invalidate) )
+            __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
+    }
+
  out:
     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
 
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index eef0372..6e0cf89 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -1089,9 +1089,10 @@  static void ept_memory_type_changed(struct p2m_domain *p2m)
 
 static void __ept_sync_domain(void *info)
 {
-    struct ept_data *ept = &((struct p2m_domain *)info)->ept;
-
-    __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept), 0);
+    /*
+     * The invalidate will be done before VMENTER (see
+     * vmx_vmenter_helper()).
+     */
 }
 
 void ept_sync_domain(struct p2m_domain *p2m)
@@ -1107,16 +1108,10 @@  void ept_sync_domain(struct p2m_domain *p2m)
     if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
         p2m_flush_nestedp2m(d);
 
-    /*
-     * Flush active cpus synchronously. Flush others the next time this domain
-     * is scheduled onto them. We accept the race of other CPUs adding to
-     * the ept_synced mask before on_selected_cpus() reads it, resulting in
-     * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack.
-     */
-    cpumask_and(ept_get_synced_mask(ept),
-                d->domain_dirty_cpumask, &cpu_online_map);
+    /* May need to invalidate on all PCPUs. */
+    cpumask_setall(ept->invalidate);
 
-    on_selected_cpus(ept_get_synced_mask(ept),
+    on_selected_cpus(d->domain_dirty_cpumask,
                      __ept_sync_domain, p2m, 1);
 }
 
@@ -1182,10 +1177,14 @@  int ept_p2m_init(struct p2m_domain *p2m)
         p2m->flush_hardware_cached_dirty = ept_flush_pml_buffers;
     }
 
-    if ( !zalloc_cpumask_var(&ept->synced_mask) )
+    if ( !zalloc_cpumask_var(&ept->invalidate) )
         return -ENOMEM;
 
-    on_each_cpu(__ept_sync_domain, p2m, 1);
+    /*
+     * Assume an initial invalidation is required, in case an EP4TA is
+     * reused.
+     */
+    cpumask_setall(ept->invalidate);
 
     return 0;
 }
@@ -1193,7 +1192,7 @@  int ept_p2m_init(struct p2m_domain *p2m)
 void ept_p2m_uninit(struct p2m_domain *p2m)
 {
     struct ept_data *ept = &p2m->ept;
-    free_cpumask_var(ept->synced_mask);
+    free_cpumask_var(ept->invalidate);
 }
 
 static void ept_dump_p2m_table(unsigned char key)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index a8d4d5b..e778d86 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -67,7 +67,7 @@  struct ept_data {
         };
         u64 eptp;
     };
-    cpumask_var_t synced_mask;
+    cpumask_var_t invalidate;
 };
 
 #define _VMX_DOMAIN_PML_ENABLED    0
@@ -97,7 +97,6 @@  struct pi_desc {
 #define ept_get_wl(ept)   ((ept)->ept_wl)
 #define ept_get_asr(ept)  ((ept)->asr)
 #define ept_get_eptp(ept) ((ept)->eptp)
-#define ept_get_synced_mask(ept) ((ept)->synced_mask)
 
 #define NR_PML_ENTRIES   512