diff mbox series

[2/2] x86/tlb: use Xen L0 assisted TLB flush when available

Message ID 20191224132616.47441-3-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: improve assisted tlb flush and use it in guest mode | expand

Commit Message

Roger Pau Monné Dec. 24, 2019, 1:26 p.m. UTC
Use Xen's L0 HVMOP_flush_tlbs hypercall when available in order to
perform flushes. This greatly increases the performance of tlb flushes
when running with a high amount of vCPUs as a Xen guest, and is
specially important when running in shim mode.

The following figures are from a PV guest running `make -j342 xen` in
shim mode with 32 vCPUs.

Using x2APIC and ALLBUT shorthand:
real	4m35.973s
user	4m35.110s
sys	36m24.117s

Using L0 assisted flush:
real	1m17.391s
user	4m42.413s
sys	6m20.773s

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/guest/xen/xen.c    | 11 +++++++++++
 xen/arch/x86/smp.c              |  6 ++++++
 xen/include/asm-x86/guest/xen.h |  7 +++++++
 3 files changed, 24 insertions(+)

Comments

Wei Liu Dec. 25, 2019, 4:13 p.m. UTC | #1
On Tue, Dec 24, 2019 at 02:26:16PM +0100, Roger Pau Monne wrote:
> Use Xen's L0 HVMOP_flush_tlbs hypercall when available in order to
> perform flushes. This greatly increases the performance of tlb flushes
> when running with a high amount of vCPUs as a Xen guest, and is
> specially important when running in shim mode.
> 
> The following figures are from a PV guest running `make -j342 xen` in
> shim mode with 32 vCPUs.
> 
> Using x2APIC and ALLBUT shorthand:
> real	4m35.973s
> user	4m35.110s
> sys	36m24.117s
> 
> Using L0 assisted flush:
> real	1m17.391s
> user	4m42.413s
> sys	6m20.773s
> 
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/guest/xen/xen.c    | 11 +++++++++++
>  xen/arch/x86/smp.c              |  6 ++++++
>  xen/include/asm-x86/guest/xen.h |  7 +++++++
>  3 files changed, 24 insertions(+)
> 
> diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c
> index 6dbc5f953f..e6493caecf 100644
> --- a/xen/arch/x86/guest/xen/xen.c
> +++ b/xen/arch/x86/guest/xen/xen.c
> @@ -281,6 +281,17 @@ int xg_free_unused_page(mfn_t mfn)
>      return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn));
>  }
>  
> +int xg_flush_tlbs(void)
> +{
> +    int rc;
> +
> +    do {
> +        rc = xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL);
> +    } while ( rc == -ERESTART );
> +
> +    return rc;
> +}
> +

Is it possible to make this a hook in the hypervisor_op?

I can foresee there will be something similar for Hyper-V.

>  static void ap_resume(void *unused)
>  {
>      map_vcpuinfo();
> diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
> index 427c33db9d..a892db28c1 100644
> --- a/xen/arch/x86/smp.c
> +++ b/xen/arch/x86/smp.c
> @@ -15,6 +15,7 @@
>  #include <xen/perfc.h>
>  #include <xen/spinlock.h>
>  #include <asm/current.h>
> +#include <asm/guest.h>
>  #include <asm/smp.h>
>  #include <asm/mc146818rtc.h>
>  #include <asm/flushtlb.h>
> @@ -235,6 +236,11 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
>      {
>          bool cpus_locked = false;
>  
> +        if ( xen_guest &&

Also it would be better to not expose xen_guest here. It is x86 generic
code after all.

I would probably introduce a function to tell if Xen is running
virtualised or not.

Wei.
Andrew Cooper Dec. 27, 2019, 2:55 p.m. UTC | #2
On 24/12/2019 13:26, Roger Pau Monne wrote:
> Use Xen's L0 HVMOP_flush_tlbs hypercall when available in order to
> perform flushes. This greatly increases the performance of tlb flushes
> when running with a high amount of vCPUs as a Xen guest, and is
> specially important when running in shim mode.
>
> The following figures are from a PV guest running `make -j342 xen` in
> shim mode with 32 vCPUs.
>
> Using x2APIC and ALLBUT shorthand:
> real	4m35.973s
> user	4m35.110s
> sys	36m24.117s
>
> Using L0 assisted flush:
> real	1m17.391s
> user	4m42.413s
> sys	6m20.773s

Nice stats.

>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>  xen/arch/x86/guest/xen/xen.c    | 11 +++++++++++
>  xen/arch/x86/smp.c              |  6 ++++++
>  xen/include/asm-x86/guest/xen.h |  7 +++++++
>  3 files changed, 24 insertions(+)
>
> diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c
> index 6dbc5f953f..e6493caecf 100644
> --- a/xen/arch/x86/guest/xen/xen.c
> +++ b/xen/arch/x86/guest/xen/xen.c
> @@ -281,6 +281,17 @@ int xg_free_unused_page(mfn_t mfn)
>      return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn));
>  }
>  
> +int xg_flush_tlbs(void)
> +{
> +    int rc;
> +
> +    do {
> +        rc = xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL);
> +    } while ( rc == -ERESTART );

ERESTART should never manifest like this, because it is taken care of
within the hypercall_page[] stub.  Anything else is a bug which needs
fixing at L0.

Have you actually seen one appearing?

~Andrew
Jan Beulich Jan. 3, 2020, 3:23 p.m. UTC | #3
On 24.12.2019 14:26, Roger Pau Monne wrote:
> @@ -235,6 +236,11 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
>      {
>          bool cpus_locked = false;
>  
> +        if ( xen_guest &&
> +             !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID)) &&
> +             !xg_flush_tlbs() )
> +            return;

With the abstraction introduced recently by Wei I think this wants
to be a per-hypervisor hook, which would also get the linear address
passed, and which would then (rather than here) decide whether it
wants to also handle a single page flush a different way.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/guest/xen/xen.c b/xen/arch/x86/guest/xen/xen.c
index 6dbc5f953f..e6493caecf 100644
--- a/xen/arch/x86/guest/xen/xen.c
+++ b/xen/arch/x86/guest/xen/xen.c
@@ -281,6 +281,17 @@  int xg_free_unused_page(mfn_t mfn)
     return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn));
 }
 
+int xg_flush_tlbs(void)
+{
+    int rc;
+
+    do {
+        rc = xen_hypercall_hvm_op(HVMOP_flush_tlbs, NULL);
+    } while ( rc == -ERESTART );
+
+    return rc;
+}
+
 static void ap_resume(void *unused)
 {
     map_vcpuinfo();
diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index 427c33db9d..a892db28c1 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -15,6 +15,7 @@ 
 #include <xen/perfc.h>
 #include <xen/spinlock.h>
 #include <asm/current.h>
+#include <asm/guest.h>
 #include <asm/smp.h>
 #include <asm/mc146818rtc.h>
 #include <asm/flushtlb.h>
@@ -235,6 +236,11 @@  void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
     {
         bool cpus_locked = false;
 
+        if ( xen_guest &&
+             !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID)) &&
+             !xg_flush_tlbs() )
+            return;
+
         spin_lock(&flush_lock);
         cpumask_and(&flush_cpumask, mask, &cpu_online_map);
         cpumask_clear_cpu(cpu, &flush_cpumask);
diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
index 2042a9a0c2..f0de9e4d71 100644
--- a/xen/include/asm-x86/guest/xen.h
+++ b/xen/include/asm-x86/guest/xen.h
@@ -36,6 +36,7 @@  extern uint32_t xen_cpuid_base;
 const struct hypervisor_ops *xg_probe(void);
 int xg_alloc_unused_page(mfn_t *mfn);
 int xg_free_unused_page(mfn_t mfn);
+int xg_flush_tlbs(void);
 
 DECLARE_PER_CPU(unsigned int, vcpu_id);
 DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
@@ -47,6 +48,12 @@  DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
 
 static inline const struct hypervisor_ops *xg_probe(void) { return NULL; }
 
+static inline int xg_flush_tlbs(void)
+{
+    ASSERT_UNREACHABLE();
+    return -ENOSYS;
+}
+
 #endif /* CONFIG_XEN_GUEST */
 #endif /* __X86_GUEST_XEN_H__ */