diff mbox series

[v2] tools/cpuid: Untangle Invariant TSC handling

Message ID 20200909122013.20613-1-andrew.cooper3@citrix.com
State New
Headers show
Series [v2] tools/cpuid: Untangle Invariant TSC handling | expand

Commit Message

Andrew Cooper Sept. 9, 2020, 12:20 p.m. UTC
ITSC being visible to the guest is currently implicit with the toolstack
unconditionally asking for it, and Xen clipping it based on the vTSC and/or
XEN_DOMCTL_disable_migrate settings.

This is problematic for several reasons.

First, the implicit vTSC behaviour manifests as a real bug on migration to a
host with a different frequency, with ITSC but without TSC scaling
capabilities, whereby the ITSC feature becomes advertised to the guest.  ITSC
will disappear again if the guest migrates to server with the same frequency
as the original, or to one with TSC scaling support.

Secondly, disallowing ITSC unless the guest doesn't migrate is conceptually
wrong.  It is common to have migration pools of identical hardware, at which
point the TSC frequency is nominally the same, and more modern hardware has
TSC scaling support anyway.  In both cases, it is safe to advertise ITSC and
migrate the guest.

Remove all implicit logic logic in Xen, and make ITSC part of the max CPUID
policies for guests.  Plumb an itsc parameter into xc_cpuid_apply_policy() and
have libxl__cpuid_legacy() fill in the two cases where it can reasonably
expect ITSC to be safe for the guest to see.

This is a behaviour change for TSC_MODE_NATIVE, where the ITSC will now
reliably not appear, and for the case where the user explicitly requests ITSC,
in which case it will appear even if the guest isn't marked as nomigrate.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
CC: Wei Liu <wl@xen.org>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Ian Jackson <Ian.Jackson@citrix.com>
CC: Anthony PERARD <anthony.perard@citrix.com>

v2:
 * Rebase over library changes
---
 tools/libs/ctrl/include/xenctrl.h           |  4 ++--
 tools/libs/guest/xg_cpuid_x86.c             | 12 ++++++------
 tools/libxl/libxl_cpuid.c                   | 19 ++++++++++++++++++-
 xen/arch/x86/cpuid.c                        |  8 --------
 xen/arch/x86/time.c                         |  2 --
 xen/include/public/arch-x86/cpufeatureset.h |  2 +-
 6 files changed, 27 insertions(+), 20 deletions(-)
diff mbox series

Patch

diff --git a/tools/libs/ctrl/include/xenctrl.h b/tools/libs/ctrl/include/xenctrl.h
index 4c89b7294c..0a921a95fa 100644
--- a/tools/libs/ctrl/include/xenctrl.h
+++ b/tools/libs/ctrl/include/xenctrl.h
@@ -1828,14 +1828,14 @@  struct xc_xend_cpuid {
  * cases, and the generated policy must be compatible with a 4.13.
  *
  * Either pass a full new @featureset (and @nr_features), or adjust individual
- * features (@pae).
+ * features (@pae, @itsc).
  *
  * Then (optionally) apply legacy XEND overrides (@xend) to the result.
  */
 int xc_cpuid_apply_policy(xc_interface *xch,
                           uint32_t domid, bool restore,
                           const uint32_t *featureset,
-                          unsigned int nr_features, bool pae,
+                          unsigned int nr_features, bool pae, bool itsc,
                           const struct xc_xend_cpuid *xend);
 int xc_mca_op(xc_interface *xch, struct xen_mc *mc);
 int xc_mca_op_inject_v2(xc_interface *xch, unsigned int flags,
diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c
index 0f24d6dd08..dc50106975 100644
--- a/tools/libs/guest/xg_cpuid_x86.c
+++ b/tools/libs/guest/xg_cpuid_x86.c
@@ -427,7 +427,7 @@  static int xc_cpuid_xend_policy(
 
 int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore,
                           const uint32_t *featureset, unsigned int nr_features,
-                          bool pae,
+                          bool pae, bool itsc,
                           const struct xc_xend_cpuid *xend)
 {
     int rc;
@@ -556,6 +556,8 @@  int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore,
     }
     else
     {
+        p->extd.itsc = itsc;
+
         if ( di.hvm )
             p->basic.pae = pae;
     }
@@ -625,12 +627,10 @@  int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore,
         }
 
         /*
-         * These settings are necessary to cause earlier HVM_PARAM_NESTEDHVM /
-         * XEN_DOMCTL_disable_migrate settings to be reflected correctly in
-         * CPUID.  Xen will discard these bits if configuration hasn't been
-         * set for the domain.
+         * These settings are necessary to cause earlier HVM_PARAM_NESTEDHVM
+         * to be reflected correctly in CPUID.  Xen will discard these bits if
+         * configuration hasn't been set for the domain.
          */
-        p->extd.itsc = true;
         p->basic.vmx = true;
         p->extd.svm = true;
     }
diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c
index 8b570b7e27..f54eb83a90 100644
--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -421,6 +421,7 @@  void libxl__cpuid_legacy(libxl_ctx *ctx, uint32_t domid, bool restore,
                          libxl_domain_build_info *info)
 {
     bool pae = true;
+    bool itsc;
 
     /*
      * For PV guests, PAE is Xen-controlled (it is the 'p' that differentiates
@@ -435,7 +436,23 @@  void libxl__cpuid_legacy(libxl_ctx *ctx, uint32_t domid, bool restore,
     if (info->type == LIBXL_DOMAIN_TYPE_HVM)
         pae = libxl_defbool_val(info->u.hvm.pae);
 
-    xc_cpuid_apply_policy(ctx->xch, domid, restore, NULL, 0, pae, info->cpuid);
+    /*
+     * Advertising Invariant TSC to a guest means that the TSC frequency won't
+     * change at any point in the future.
+     *
+     * We do not have enough information about potential migration
+     * destinations to know whether advertising ITSC is safe, but if the guest
+     * isn't going to migrate, then the current hardware is all that matters.
+     *
+     * Alternatively, an internal property of vTSC is that the values read are
+     * invariant.  Advertise ITSC when we know the domain will have emualted
+     * TSC everywhere it goes.
+     */
+    itsc = (libxl_defbool_val(info->disable_migrate) ||
+            info->tsc_mode == LIBXL_TSC_MODE_ALWAYS_EMULATE);
+
+    xc_cpuid_apply_policy(ctx->xch, domid, restore, NULL, 0,
+                          pae, itsc, info->cpuid);
 }
 
 static const char *input_names[2] = { "leaf", "subleaf" };
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 4b424fac95..23425790e1 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -621,14 +621,6 @@  void recalculate_cpuid_policy(struct domain *d)
     }
 
     /*
-     * ITSC is masked by default (so domains are safe to migrate), but a
-     * toolstack which has configured disable_migrate or vTSC for a domain may
-     * safely select it, and needs a way of doing so.
-     */
-    if ( cpu_has_itsc && (d->disable_migrate || d->arch.vtsc) )
-        __set_bit(X86_FEATURE_ITSC, max_fs);
-
-    /*
      * On hardware with MSR_TSX_CTRL, the admin may have elected to disable
      * TSX and hide the feature bits.  Migrating-in VMs may have been booted
      * pre-mitigation when the TSX features were visbile.
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 505e54ebd7..8938c0f435 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -2448,8 +2448,6 @@  int tsc_set_info(struct domain *d,
         }
     }
 
-    recalculate_cpuid_policy(d);
-
     return 0;
 }
 
diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
index fc733e64f6..abd18722ee 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -246,7 +246,7 @@  XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*a  MOVDIR64B instruction */
 XEN_CPUFEATURE(ENQCMD,        6*32+29) /*   ENQCMD{,S} instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
-XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
+XEN_CPUFEATURE(ITSC,          7*32+ 8) /*a  Invariant TSC */
 XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */