diff mbox series

[v5,2/7] x86/paging: add TLB flush hooks

Message ID 20200219174354.84726-3-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: improve assisted tlb flush and use it in guest mode | expand

Commit Message

Roger Pau Monné Feb. 19, 2020, 5:43 p.m. UTC
Add shadow and hap implementation specific helpers to perform guest
TLB flushes. Note that the code for both is exactly the same at the
moment, and is copied from hvm_flush_vcpu_tlb. This will be changed by
further patches that will add implementation specific optimizations to
them.

No functional change intended.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Wei Liu <wl@xen.org>
Acked-by: Tim Deegan <tim@xen.org>
---
Changes since v3:
 - Fix stray newline removal.
 - Fix return of shadow_flush_tlb dummy function.
---
 xen/arch/x86/hvm/hvm.c          | 51 ++----------------------------
 xen/arch/x86/mm/hap/hap.c       | 54 ++++++++++++++++++++++++++++++++
 xen/arch/x86/mm/shadow/common.c | 55 +++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hap.h       |  3 ++
 xen/include/asm-x86/shadow.h    | 12 +++++++
 5 files changed, 127 insertions(+), 48 deletions(-)

Comments

Jan Beulich Feb. 28, 2020, 2:50 p.m. UTC | #1
On 19.02.2020 18:43, Roger Pau Monne wrote:
> Add shadow and hap implementation specific helpers to perform guest
> TLB flushes. Note that the code for both is exactly the same at the
> moment, and is copied from hvm_flush_vcpu_tlb. This will be changed by
> further patches that will add implementation specific optimizations to
> them.
> 
> No functional change intended.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> Reviewed-by: Wei Liu <wl@xen.org>
> Acked-by: Tim Deegan <tim@xen.org>

This looks good in principle, with one possible anomaly:

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3990,55 +3990,10 @@ static void hvm_s3_resume(struct domain *d)
>  bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
>                          void *ctxt)
>  {
> -    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
> -    cpumask_t *mask = &this_cpu(flush_cpumask);
> -    struct domain *d = current->domain;
> -    struct vcpu *v;
> -
> -    /* Avoid deadlock if more than one vcpu tries this at the same time. */
> -    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
> -        return false;
> -
> -    /* Pause all other vcpus. */
> -    for_each_vcpu ( d, v )
> -        if ( v != current && flush_vcpu(ctxt, v) )
> -            vcpu_pause_nosync(v);
> -
> -    /* Now that all VCPUs are signalled to deschedule, we wait... */
> -    for_each_vcpu ( d, v )
> -        if ( v != current && flush_vcpu(ctxt, v) )
> -            while ( !vcpu_runnable(v) && v->is_running )
> -                cpu_relax();
> -
> -    /* All other vcpus are paused, safe to unlock now. */
> -    spin_unlock(&d->hypercall_deadlock_mutex);
> -
> -    cpumask_clear(mask);
> -
> -    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
> -    for_each_vcpu ( d, v )
> -    {
> -        unsigned int cpu;
> -
> -        if ( !flush_vcpu(ctxt, v) )
> -            continue;
> -
> -        paging_update_cr3(v, false);
> +    struct domain *currd = current->domain;
>  
> -        cpu = read_atomic(&v->dirty_cpu);
> -        if ( is_vcpu_dirty_cpu(cpu) )
> -            __cpumask_set_cpu(cpu, mask);
> -    }
> -
> -    /* Flush TLBs on all CPUs with dirty vcpu state. */
> -    flush_tlb_mask(mask);
> -
> -    /* Done. */
> -    for_each_vcpu ( d, v )
> -        if ( v != current && flush_vcpu(ctxt, v) )
> -            vcpu_unpause(v);
> -
> -    return true;
> +    return shadow_mode_enabled(currd) ? shadow_flush_tlb(flush_vcpu, ctxt)
> +                                      : hap_flush_tlb(flush_vcpu, ctxt);
>  }

Following our current model I think this should be a new pointer
in struct paging_mode (then truly fitting "hooks" in the title).
I can see the desire to avoid the indirect call though, but I
also think that if we were to go that route, we should settle on
switching around others as well which are paging mode dependent.
(FAOD this is nothing I ask you to do here.) Andrew, thoughts?

Jan
Roger Pau Monné Feb. 28, 2020, 4:19 p.m. UTC | #2
On Fri, Feb 28, 2020 at 03:50:31PM +0100, Jan Beulich wrote:
> On 19.02.2020 18:43, Roger Pau Monne wrote:
> > Add shadow and hap implementation specific helpers to perform guest
> > TLB flushes. Note that the code for both is exactly the same at the
> > moment, and is copied from hvm_flush_vcpu_tlb. This will be changed by
> > further patches that will add implementation specific optimizations to
> > them.
> > 
> > No functional change intended.
> > 
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > Reviewed-by: Wei Liu <wl@xen.org>
> > Acked-by: Tim Deegan <tim@xen.org>
> 
> This looks good in principle, with one possible anomaly:
> 
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -3990,55 +3990,10 @@ static void hvm_s3_resume(struct domain *d)
> >  bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> >                          void *ctxt)
> >  {
> > -    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
> > -    cpumask_t *mask = &this_cpu(flush_cpumask);
> > -    struct domain *d = current->domain;
> > -    struct vcpu *v;
> > -
> > -    /* Avoid deadlock if more than one vcpu tries this at the same time. */
> > -    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
> > -        return false;
> > -
> > -    /* Pause all other vcpus. */
> > -    for_each_vcpu ( d, v )
> > -        if ( v != current && flush_vcpu(ctxt, v) )
> > -            vcpu_pause_nosync(v);
> > -
> > -    /* Now that all VCPUs are signalled to deschedule, we wait... */
> > -    for_each_vcpu ( d, v )
> > -        if ( v != current && flush_vcpu(ctxt, v) )
> > -            while ( !vcpu_runnable(v) && v->is_running )
> > -                cpu_relax();
> > -
> > -    /* All other vcpus are paused, safe to unlock now. */
> > -    spin_unlock(&d->hypercall_deadlock_mutex);
> > -
> > -    cpumask_clear(mask);
> > -
> > -    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
> > -    for_each_vcpu ( d, v )
> > -    {
> > -        unsigned int cpu;
> > -
> > -        if ( !flush_vcpu(ctxt, v) )
> > -            continue;
> > -
> > -        paging_update_cr3(v, false);
> > +    struct domain *currd = current->domain;
> >  
> > -        cpu = read_atomic(&v->dirty_cpu);
> > -        if ( is_vcpu_dirty_cpu(cpu) )
> > -            __cpumask_set_cpu(cpu, mask);
> > -    }
> > -
> > -    /* Flush TLBs on all CPUs with dirty vcpu state. */
> > -    flush_tlb_mask(mask);
> > -
> > -    /* Done. */
> > -    for_each_vcpu ( d, v )
> > -        if ( v != current && flush_vcpu(ctxt, v) )
> > -            vcpu_unpause(v);
> > -
> > -    return true;
> > +    return shadow_mode_enabled(currd) ? shadow_flush_tlb(flush_vcpu, ctxt)
> > +                                      : hap_flush_tlb(flush_vcpu, ctxt);
> >  }
> 
> Following our current model I think this should be a new pointer
> in struct paging_mode (then truly fitting "hooks" in the title).

I tried doing it that way, but there was something weird about it, the
paging mode is per-vcpu, and hence I needed to do something like:

paging_get_hostmode(current)->flush(current->domain, ...)

I can try to move it to being a paging_mode hook if you prefer.

> I can see the desire to avoid the indirect call though, but I
> also think that if we were to go that route, we should settle on
> switching around others as well which are paging mode dependent.
> (FAOD this is nothing I ask you to do here.) Andrew, thoughts?

I think it's already quite of a mixed bag, see track_dirty_vram for
example which uses a similar model.

Thanks, Roger.
Jan Beulich Feb. 28, 2020, 4:40 p.m. UTC | #3
On 28.02.2020 17:19, Roger Pau Monné wrote:
> On Fri, Feb 28, 2020 at 03:50:31PM +0100, Jan Beulich wrote:
>> On 19.02.2020 18:43, Roger Pau Monne wrote:
>>> Add shadow and hap implementation specific helpers to perform guest
>>> TLB flushes. Note that the code for both is exactly the same at the
>>> moment, and is copied from hvm_flush_vcpu_tlb. This will be changed by
>>> further patches that will add implementation specific optimizations to
>>> them.
>>>
>>> No functional change intended.
>>>
>>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>>> Reviewed-by: Wei Liu <wl@xen.org>
>>> Acked-by: Tim Deegan <tim@xen.org>
>>
>> This looks good in principle, with one possible anomaly:
>>
>>> --- a/xen/arch/x86/hvm/hvm.c
>>> +++ b/xen/arch/x86/hvm/hvm.c
>>> @@ -3990,55 +3990,10 @@ static void hvm_s3_resume(struct domain *d)
>>>  bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
>>>                          void *ctxt)
>>>  {
>>> -    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
>>> -    cpumask_t *mask = &this_cpu(flush_cpumask);
>>> -    struct domain *d = current->domain;
>>> -    struct vcpu *v;
>>> -
>>> -    /* Avoid deadlock if more than one vcpu tries this at the same time. */
>>> -    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
>>> -        return false;
>>> -
>>> -    /* Pause all other vcpus. */
>>> -    for_each_vcpu ( d, v )
>>> -        if ( v != current && flush_vcpu(ctxt, v) )
>>> -            vcpu_pause_nosync(v);
>>> -
>>> -    /* Now that all VCPUs are signalled to deschedule, we wait... */
>>> -    for_each_vcpu ( d, v )
>>> -        if ( v != current && flush_vcpu(ctxt, v) )
>>> -            while ( !vcpu_runnable(v) && v->is_running )
>>> -                cpu_relax();
>>> -
>>> -    /* All other vcpus are paused, safe to unlock now. */
>>> -    spin_unlock(&d->hypercall_deadlock_mutex);
>>> -
>>> -    cpumask_clear(mask);
>>> -
>>> -    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
>>> -    for_each_vcpu ( d, v )
>>> -    {
>>> -        unsigned int cpu;
>>> -
>>> -        if ( !flush_vcpu(ctxt, v) )
>>> -            continue;
>>> -
>>> -        paging_update_cr3(v, false);
>>> +    struct domain *currd = current->domain;
>>>  
>>> -        cpu = read_atomic(&v->dirty_cpu);
>>> -        if ( is_vcpu_dirty_cpu(cpu) )
>>> -            __cpumask_set_cpu(cpu, mask);
>>> -    }
>>> -
>>> -    /* Flush TLBs on all CPUs with dirty vcpu state. */
>>> -    flush_tlb_mask(mask);
>>> -
>>> -    /* Done. */
>>> -    for_each_vcpu ( d, v )
>>> -        if ( v != current && flush_vcpu(ctxt, v) )
>>> -            vcpu_unpause(v);
>>> -
>>> -    return true;
>>> +    return shadow_mode_enabled(currd) ? shadow_flush_tlb(flush_vcpu, ctxt)
>>> +                                      : hap_flush_tlb(flush_vcpu, ctxt);
>>>  }
>>
>> Following our current model I think this should be a new pointer
>> in struct paging_mode (then truly fitting "hooks" in the title).
> 
> I tried doing it that way, but there was something weird about it, the
> paging mode is per-vcpu, and hence I needed to do something like:
> 
> paging_get_hostmode(current)->flush(current->domain, ...)

I don't see anything wrong with the left side of the -> (it
parallels what is needed for the write_p2m_entry() hook). For
the right I can't see why you'd want to have current->domain
there when both functions want flush_vcpu and ctxt. Ultimately
we probably want per-vCPU and per-domain hooks in separate
structures (the former for hooks where the current paging mode
matters, the latter for those where it doesn't matter), but of
course that's nothing I'm meaning to ask you to do.

> I can try to move it to being a paging_mode hook if you prefer.

It would seem cleaner to me, but ...

>> I can see the desire to avoid the indirect call though, but I
>> also think that if we were to go that route, we should settle on
>> switching around others as well which are paging mode dependent.
>> (FAOD this is nothing I ask you to do here.) Andrew, thoughts?

... as said I'd prefer to also know Andrew's opinion, in
particular to settle where we would want this to move in the
mid to long term. Whereas ...

> I think it's already quite of a mixed bag, see track_dirty_vram for
> example which uses a similar model.

... you probably know my typical response to something like
this: Bad examples aren't to be taken as excuse to introduce
further inconsistencies.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 00a9e70b7c..4049f57232 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3990,55 +3990,10 @@  static void hvm_s3_resume(struct domain *d)
 bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
                         void *ctxt)
 {
-    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
-    cpumask_t *mask = &this_cpu(flush_cpumask);
-    struct domain *d = current->domain;
-    struct vcpu *v;
-
-    /* Avoid deadlock if more than one vcpu tries this at the same time. */
-    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
-        return false;
-
-    /* Pause all other vcpus. */
-    for_each_vcpu ( d, v )
-        if ( v != current && flush_vcpu(ctxt, v) )
-            vcpu_pause_nosync(v);
-
-    /* Now that all VCPUs are signalled to deschedule, we wait... */
-    for_each_vcpu ( d, v )
-        if ( v != current && flush_vcpu(ctxt, v) )
-            while ( !vcpu_runnable(v) && v->is_running )
-                cpu_relax();
-
-    /* All other vcpus are paused, safe to unlock now. */
-    spin_unlock(&d->hypercall_deadlock_mutex);
-
-    cpumask_clear(mask);
-
-    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
-    for_each_vcpu ( d, v )
-    {
-        unsigned int cpu;
-
-        if ( !flush_vcpu(ctxt, v) )
-            continue;
-
-        paging_update_cr3(v, false);
+    struct domain *currd = current->domain;
 
-        cpu = read_atomic(&v->dirty_cpu);
-        if ( is_vcpu_dirty_cpu(cpu) )
-            __cpumask_set_cpu(cpu, mask);
-    }
-
-    /* Flush TLBs on all CPUs with dirty vcpu state. */
-    flush_tlb_mask(mask);
-
-    /* Done. */
-    for_each_vcpu ( d, v )
-        if ( v != current && flush_vcpu(ctxt, v) )
-            vcpu_unpause(v);
-
-    return true;
+    return shadow_mode_enabled(currd) ? shadow_flush_tlb(flush_vcpu, ctxt)
+                                      : hap_flush_tlb(flush_vcpu, ctxt);
 }
 
 static bool always_flush(void *ctxt, struct vcpu *v)
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 3d93f3451c..6894c1aa38 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -669,6 +669,60 @@  static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
     hvm_update_guest_cr3(v, noflush);
 }
 
+bool hap_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                   void *ctxt)
+{
+    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
+    cpumask_t *mask = &this_cpu(flush_cpumask);
+    struct domain *d = current->domain;
+    struct vcpu *v;
+
+    /* Avoid deadlock if more than one vcpu tries this at the same time. */
+    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
+        return false;
+
+    /* Pause all other vcpus. */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            vcpu_pause_nosync(v);
+
+    /* Now that all VCPUs are signalled to deschedule, we wait... */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            while ( !vcpu_runnable(v) && v->is_running )
+                cpu_relax();
+
+    /* All other vcpus are paused, safe to unlock now. */
+    spin_unlock(&d->hypercall_deadlock_mutex);
+
+    cpumask_clear(mask);
+
+    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
+    for_each_vcpu ( d, v )
+    {
+        unsigned int cpu;
+
+        if ( !flush_vcpu(ctxt, v) )
+            continue;
+
+        paging_update_cr3(v, false);
+
+        cpu = read_atomic(&v->dirty_cpu);
+        if ( is_vcpu_dirty_cpu(cpu) )
+            __cpumask_set_cpu(cpu, mask);
+    }
+
+    /* Flush TLBs on all CPUs with dirty vcpu state. */
+    flush_tlb_mask(mask);
+
+    /* Done. */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            vcpu_unpause(v);
+
+    return true;
+}
+
 const struct paging_mode *
 hap_paging_get_mode(struct vcpu *v)
 {
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index cba3ab1eba..121ddf1255 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3357,6 +3357,61 @@  out:
     return rc;
 }
 
+/* Fluhs TLB of selected vCPUs. */
+bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                      void *ctxt)
+{
+    static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
+    cpumask_t *mask = &this_cpu(flush_cpumask);
+    struct domain *d = current->domain;
+    struct vcpu *v;
+
+    /* Avoid deadlock if more than one vcpu tries this at the same time. */
+    if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
+        return false;
+
+    /* Pause all other vcpus. */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            vcpu_pause_nosync(v);
+
+    /* Now that all VCPUs are signalled to deschedule, we wait... */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            while ( !vcpu_runnable(v) && v->is_running )
+                cpu_relax();
+
+    /* All other vcpus are paused, safe to unlock now. */
+    spin_unlock(&d->hypercall_deadlock_mutex);
+
+    cpumask_clear(mask);
+
+    /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
+    for_each_vcpu ( d, v )
+    {
+        unsigned int cpu;
+
+        if ( !flush_vcpu(ctxt, v) )
+            continue;
+
+        paging_update_cr3(v, false);
+
+        cpu = read_atomic(&v->dirty_cpu);
+        if ( is_vcpu_dirty_cpu(cpu) )
+            __cpumask_set_cpu(cpu, mask);
+    }
+
+    /* Flush TLBs on all CPUs with dirty vcpu state. */
+    flush_tlb_mask(mask);
+
+    /* Done. */
+    for_each_vcpu ( d, v )
+        if ( v != current && flush_vcpu(ctxt, v) )
+            vcpu_unpause(v);
+
+    return true;
+}
+
 /**************************************************************************/
 /* Shadow-control XEN_DOMCTL dispatcher */
 
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index b94bfb4ed0..0c6aa26b9b 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -46,6 +46,9 @@  int   hap_track_dirty_vram(struct domain *d,
 extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
 int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted);
 
+bool hap_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                   void *ctxt);
+
 #endif /* XEN_HAP_H */
 
 /*
diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
index 907c71f497..cfd4650a16 100644
--- a/xen/include/asm-x86/shadow.h
+++ b/xen/include/asm-x86/shadow.h
@@ -95,6 +95,10 @@  void shadow_blow_tables_per_domain(struct domain *d);
 int shadow_set_allocation(struct domain *d, unsigned int pages,
                           bool *preempted);
 
+/* Flush the TLB of the selected vCPUs. */
+bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
+                      void *ctxt);
+
 #else /* !CONFIG_SHADOW_PAGING */
 
 #define shadow_teardown(d, p) ASSERT(is_pv_domain(d))
@@ -106,6 +110,14 @@  int shadow_set_allocation(struct domain *d, unsigned int pages,
 #define shadow_set_allocation(d, pages, preempted) \
     ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
 
+static inline bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt,
+                                                       struct vcpu *v),
+                                    void *ctxt)
+{
+    ASSERT_UNREACHABLE();
+    return false;
+}
+
 static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
                                      int fast, int all) {}