diff mbox series

[v4,7/7] x86/tlb: use Xen L0 assisted TLB flush when available

Message ID 20200210172829.43604-8-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: improve assisted tlb flush and use it in guest mode | expand

Commit Message

Roger Pau Monné Feb. 10, 2020, 5:28 p.m. UTC
Use Xen's L0 HVMOP_flush_tlbs hypercall in order to perform flushes.
This greatly increases the performance of TLB flushes when running
with a high amount of vCPUs as a Xen guest, and is specially important
when running in shim mode.

The following figures are from a PV guest running `make -j32 xen` in
shim mode with 32 vCPUs and HAP.

Using x2APIC and ALLBUT shorthand:
real	4m35.973s
user	4m35.110s
sys	36m24.117s

Using L0 assisted flush:
real    1m2.596s
user    4m34.818s
sys     5m16.374s

The implementation adds a new hook to hypervisor_ops so other
enlightenments can also implement such assisted flush just by filling
the hook. Note that the Xen implementation completely ignores the
dirty CPU mask and the linear address passed in, and always performs a
global TLB flush on all vCPUs.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v3:
 - Use an alternative call for the flush hook.

Changes since v1:
 - Add a L0 assisted hook to hypervisor ops.
---
 xen/arch/x86/guest/hypervisor.c        | 10 ++++++++++
 xen/arch/x86/guest/xen/xen.c           |  6 ++++++
 xen/arch/x86/smp.c                     | 11 +++++++++++
 xen/include/asm-x86/guest/hypervisor.h | 17 +++++++++++++++++
 4 files changed, 44 insertions(+)

Comments

Wei Liu Feb. 10, 2020, 8:16 p.m. UTC | #1
On Mon, Feb 10, 2020 at 06:28:29PM +0100, Roger Pau Monne wrote:
> Use Xen's L0 HVMOP_flush_tlbs hypercall in order to perform flushes.
> This greatly increases the performance of TLB flushes when running
> with a high amount of vCPUs as a Xen guest, and is specially important
> when running in shim mode.
> 
> The following figures are from a PV guest running `make -j32 xen` in
> shim mode with 32 vCPUs and HAP.
> 
> Using x2APIC and ALLBUT shorthand:
> real	4m35.973s
> user	4m35.110s
> sys	36m24.117s
> 
> Using L0 assisted flush:
> real    1m2.596s
> user    4m34.818s
> sys     5m16.374s
> 
> The implementation adds a new hook to hypervisor_ops so other
> enlightenments can also implement such assisted flush just by filling
> the hook. Note that the Xen implementation completely ignores the
> dirty CPU mask and the linear address passed in, and always performs a
> global TLB flush on all vCPUs.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Wei Liu <wl@xen.org>

One remark below.

[...]
>  const struct hypervisor_ops *__init xg_probe(void)
> diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
> index 65eb7cbda8..9bc925616a 100644
> --- a/xen/arch/x86/smp.c
> +++ b/xen/arch/x86/smp.c
> @@ -15,6 +15,7 @@
>  #include <xen/perfc.h>
>  #include <xen/spinlock.h>
>  #include <asm/current.h>
> +#include <asm/guest.h>
>  #include <asm/smp.h>
>  #include <asm/mc146818rtc.h>
>  #include <asm/flushtlb.h>
> @@ -256,6 +257,16 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
>      if ( (flags & ~FLUSH_ORDER_MASK) &&
>           !cpumask_subset(mask, cpumask_of(cpu)) )
>      {
> +        if ( cpu_has_hypervisor &&
> +             !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID |
> +                         FLUSH_ORDER_MASK)) &&
> +             !hypervisor_flush_tlb(mask, va, flags & FLUSH_ORDER_MASK) )

I would like pass in the flag as a whole because Hyper-V has the
capability to fine tune what gets flushed. I can see FLUSH_TLB_GLOBAL
being used in that situation.

There is no need to change your patch though. I can submit a patch
myself to change this interface once your series is accepted.
Wei Liu Feb. 11, 2020, 10:34 a.m. UTC | #2
On Mon, Feb 10, 2020 at 06:28:29PM +0100, Roger Pau Monne wrote:
[...]
>  
>  struct hypervisor_ops {
> @@ -32,6 +34,8 @@ struct hypervisor_ops {
>      void (*resume)(void);
>      /* Fix up e820 map */
>      void (*e820_fixup)(struct e820map *e820);
> +    /* L0 assisted TLB flush */
> +    int (*flush_tlb)(const cpumask_t *mask, const void *va, unsigned int order);
>  };
>  
>  #ifdef CONFIG_GUEST
> @@ -41,6 +45,14 @@ void hypervisor_setup(void);
>  int hypervisor_ap_setup(void);
>  void hypervisor_resume(void);
>  void hypervisor_e820_fixup(struct e820map *e820);
> +/*
> + * L0 assisted TLB flush.
> + * mask: cpumask of the dirty vCPUs that should be flushed.
> + * va: linear address to flush, or NULL for global flushes.

I was in the middle of writing my patch and noticed this.

I think NULL means "flushing the entire address space" here?

Wei.
Roger Pau Monné Feb. 11, 2020, 2:06 p.m. UTC | #3
On Tue, Feb 11, 2020 at 10:34:24AM +0000, Wei Liu wrote:
> On Mon, Feb 10, 2020 at 06:28:29PM +0100, Roger Pau Monne wrote:
> [...]
> >  
> >  struct hypervisor_ops {
> > @@ -32,6 +34,8 @@ struct hypervisor_ops {
> >      void (*resume)(void);
> >      /* Fix up e820 map */
> >      void (*e820_fixup)(struct e820map *e820);
> > +    /* L0 assisted TLB flush */
> > +    int (*flush_tlb)(const cpumask_t *mask, const void *va, unsigned int order);
> >  };
> >  
> >  #ifdef CONFIG_GUEST
> > @@ -41,6 +45,14 @@ void hypervisor_setup(void);
> >  int hypervisor_ap_setup(void);
> >  void hypervisor_resume(void);
> >  void hypervisor_e820_fixup(struct e820map *e820);
> > +/*
> > + * L0 assisted TLB flush.
> > + * mask: cpumask of the dirty vCPUs that should be flushed.
> > + * va: linear address to flush, or NULL for global flushes.
> 
> I was in the middle of writing my patch and noticed this.
> 
> I think NULL means "flushing the entire address space" here?

Yes, that's right. I didn't add a way to differentiate between global
(ie: PGE mappings included) flushes and non-global flushes, so all
calls are assumed to imply flushes of global mappings.

It might be better if you adapt it yourself to whatever is more suited
for HyperV which has more selective flushes available. Xen only has an
hypercall to request a global flush on all vCPUs.

Thanks, Roger.
Wei Liu Feb. 11, 2020, 2:08 p.m. UTC | #4
On Tue, Feb 11, 2020 at 03:06:21PM +0100, Roger Pau Monné wrote:
> On Tue, Feb 11, 2020 at 10:34:24AM +0000, Wei Liu wrote:
> > On Mon, Feb 10, 2020 at 06:28:29PM +0100, Roger Pau Monne wrote:
> > [...]
> > >  
> > >  struct hypervisor_ops {
> > > @@ -32,6 +34,8 @@ struct hypervisor_ops {
> > >      void (*resume)(void);
> > >      /* Fix up e820 map */
> > >      void (*e820_fixup)(struct e820map *e820);
> > > +    /* L0 assisted TLB flush */
> > > +    int (*flush_tlb)(const cpumask_t *mask, const void *va, unsigned int order);
> > >  };
> > >  
> > >  #ifdef CONFIG_GUEST
> > > @@ -41,6 +45,14 @@ void hypervisor_setup(void);
> > >  int hypervisor_ap_setup(void);
> > >  void hypervisor_resume(void);
> > >  void hypervisor_e820_fixup(struct e820map *e820);
> > > +/*
> > > + * L0 assisted TLB flush.
> > > + * mask: cpumask of the dirty vCPUs that should be flushed.
> > > + * va: linear address to flush, or NULL for global flushes.
> > 
> > I was in the middle of writing my patch and noticed this.
> > 
> > I think NULL means "flushing the entire address space" here?
> 
> Yes, that's right. I didn't add a way to differentiate between global
> (ie: PGE mappings included) flushes and non-global flushes, so all
> calls are assumed to imply flushes of global mappings.
> 
> It might be better if you adapt it yourself to whatever is more suited
> for HyperV which has more selective flushes available. Xen only has an
> hypercall to request a global flush on all vCPUs.

OK. Thanks for confirming.

I will change this comment in my patch.

Wei.

> 
> Thanks, Roger.
Roger Pau Monné Feb. 18, 2020, 12:42 p.m. UTC | #5
On Mon, Feb 10, 2020 at 06:28:29PM +0100, Roger Pau Monne wrote:
> @@ -256,6 +257,16 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
>      if ( (flags & ~FLUSH_ORDER_MASK) &&
>           !cpumask_subset(mask, cpumask_of(cpu)) )
>      {
> +        if ( cpu_has_hypervisor &&
> +             !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID |
> +                         FLUSH_ORDER_MASK)) &&
> +             !hypervisor_flush_tlb(mask, va, flags & FLUSH_ORDER_MASK) )

This needs to be adjusted to (flags - 1) & FLUSH_ORDER_MASK.
diff mbox series

Patch

diff --git a/xen/arch/x86/guest/hypervisor.c b/xen/arch/x86/guest/hypervisor.c
index 647cdb1367..47e938e287 100644
--- a/xen/arch/x86/guest/hypervisor.c
+++ b/xen/arch/x86/guest/hypervisor.c
@@ -18,6 +18,7 @@ 
  *
  * Copyright (c) 2019 Microsoft.
  */
+#include <xen/cpumask.h>
 #include <xen/init.h>
 #include <xen/types.h>
 
@@ -73,6 +74,15 @@  void __init hypervisor_e820_fixup(struct e820map *e820)
         ops.e820_fixup(e820);
 }
 
+int hypervisor_flush_tlb(const cpumask_t *mask, const void *va,
+                         unsigned int order)
+{
+    if ( ops.flush_tlb )
+        return alternative_call(ops.flush_tlb, mask, va, order);
+
+    return -ENOSYS;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c
index f151b07548..5d3427a713 100644
--- a/xen/arch/x86/guest/xen/xen.c
+++ b/xen/arch/x86/guest/xen/xen.c
@@ -324,12 +324,18 @@  static void __init e820_fixup(struct e820map *e820)
         pv_shim_fixup_e820(e820);
 }
 
+static int flush_tlb(const cpumask_t *mask, const void *va, unsigned int order)
+{
+    return xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL);
+}
+
 static const struct hypervisor_ops __initdata ops = {
     .name = "Xen",
     .setup = setup,
     .ap_setup = ap_setup,
     .resume = resume,
     .e820_fixup = e820_fixup,
+    .flush_tlb = flush_tlb,
 };
 
 const struct hypervisor_ops *__init xg_probe(void)
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 65eb7cbda8..9bc925616a 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -15,6 +15,7 @@ 
 #include <xen/perfc.h>
 #include <xen/spinlock.h>
 #include <asm/current.h>
+#include <asm/guest.h>
 #include <asm/smp.h>
 #include <asm/mc146818rtc.h>
 #include <asm/flushtlb.h>
@@ -256,6 +257,16 @@  void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
     if ( (flags & ~FLUSH_ORDER_MASK) &&
          !cpumask_subset(mask, cpumask_of(cpu)) )
     {
+        if ( cpu_has_hypervisor &&
+             !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID |
+                         FLUSH_ORDER_MASK)) &&
+             !hypervisor_flush_tlb(mask, va, flags & FLUSH_ORDER_MASK) )
+        {
+            if ( tlb_clk_enabled )
+                tlb_clk_enabled = false;
+            return;
+        }
+
         spin_lock(&flush_lock);
         cpumask_and(&flush_cpumask, mask, &cpu_online_map);
         cpumask_clear_cpu(cpu, &flush_cpumask);
diff --git a/xen/include/asm-x86/guest/hypervisor.h b/xen/include/asm-x86/guest/hypervisor.h
index ade10e74ea..432e57c2a0 100644
--- a/xen/include/asm-x86/guest/hypervisor.h
+++ b/xen/include/asm-x86/guest/hypervisor.h
@@ -19,6 +19,8 @@ 
 #ifndef __X86_HYPERVISOR_H__
 #define __X86_HYPERVISOR_H__
 
+#include <xen/cpumask.h>
+
 #include <asm/e820.h>
 
 struct hypervisor_ops {
@@ -32,6 +34,8 @@  struct hypervisor_ops {
     void (*resume)(void);
     /* Fix up e820 map */
     void (*e820_fixup)(struct e820map *e820);
+    /* L0 assisted TLB flush */
+    int (*flush_tlb)(const cpumask_t *mask, const void *va, unsigned int order);
 };
 
 #ifdef CONFIG_GUEST
@@ -41,6 +45,14 @@  void hypervisor_setup(void);
 int hypervisor_ap_setup(void);
 void hypervisor_resume(void);
 void hypervisor_e820_fixup(struct e820map *e820);
+/*
+ * L0 assisted TLB flush.
+ * mask: cpumask of the dirty vCPUs that should be flushed.
+ * va: linear address to flush, or NULL for global flushes.
+ * order: order of the linear address pointed by va.
+ */
+int hypervisor_flush_tlb(const cpumask_t *mask, const void *va,
+                         unsigned int order);
 
 #else
 
@@ -52,6 +64,11 @@  static inline void hypervisor_setup(void) { ASSERT_UNREACHABLE(); }
 static inline int hypervisor_ap_setup(void) { return 0; }
 static inline void hypervisor_resume(void) { ASSERT_UNREACHABLE(); }
 static inline void hypervisor_e820_fixup(struct e820map *e820) {}
+static inline int hypervisor_flush_tlb(const cpumask_t *mask, const void *va,
+                                       unsigned int order)
+{
+    return -ENOSYS;
+}
 
 #endif  /* CONFIG_GUEST */