[RFC,v3,6/8] x86emul: support RDPRU
diff mbox series

Message ID c75e9cd6-facc-3568-f31e-9345a9c74064@suse.com
State New
Headers show
Series
  • x86emul: further work
Related show

Commit Message

Jan Beulich Jan. 6, 2020, 4:37 p.m. UTC
While the PM doesn't say so, this assumes that the MPERF value read this
way gets scaled similarly to its reading through RDMSR.

Also introduce the SVM related constants at this occasion.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: Andrew promised to take care of the CPUID side of this; will need
     re-basing over his work once available.
TBD: There are indications that the CPUID field used may be just 8 bits
     wide.
---
v4: Add GENERAL2_INTERCEPT_RDPRU and VMEXIT_RDPRU enumerators. Fold
    handling of out of bounds indexes into switch(). Avoid
    recalculate_misc() clobbering what recalculate_cpu_policy() has
    done. Re-base.
v3: New.

Comments

Andrew Cooper Jan. 6, 2020, 7:50 p.m. UTC | #1
On 06/01/2020 16:37, Jan Beulich wrote:
> While the PM doesn't say so, this assumes that the MPERF value read this
> way gets scaled similarly to its reading through RDMSR.
>
> Also introduce the SVM related constants at this occasion.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC: Andrew promised to take care of the CPUID side of this; will need
>      re-basing over his work once available.
> TBD: There are indications that the CPUID field used may be just 8 bits
>      wide.

Getting there.  The movement of CPU_POLICY into the migrate stream is a
substantial step in the right direction.

I've got half a mind to use RDPRU as the first real "need to opt-in"
feature, seeing as there currently isn't an answer for how APERF/MPERF
is virtualisation-safe in the first place, and how exposing it in
userspace is any better.

Perhaps better would be to make APERF/MPERF opt-in to begin with (so it
starts ceasing to exist for VMs by default, but in a backwards
compatible way), and then derive RDPRU as dependent on APERF/MPERF.

~Andrew

Patch
diff mbox series

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -259,6 +259,7 @@  int libxl_cpuid_parse_config(libxl_cpuid
 
         {"clzero",       0x80000008, NA, CPUID_REG_EBX,  0,  1},
         {"rstr-fp-err-ptrs", 0x80000008, NA, CPUID_REG_EBX, 2, 1},
+        {"rdpru",        0x80000008, NA, CPUID_REG_EBX,  4,  1},
         {"wbnoinvd",     0x80000008, NA, CPUID_REG_EBX,  9,  1},
         {"ibpb",         0x80000008, NA, CPUID_REG_EBX, 12,  1},
         {"ppin",         0x80000008, NA, CPUID_REG_EBX, 23,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -147,6 +147,8 @@  static const char *const str_e8b[32] =
     [ 0] = "clzero",
     [ 2] = "rstr-fp-err-ptrs",
 
+    [ 4] = "rdpru",
+
     /* [ 8] */            [ 9] = "wbnoinvd",
 
     [12] = "ibpb",
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -671,6 +671,13 @@  static int read_msr(
 {
     switch ( reg )
     {
+    case 0x000000e8: /* APERF */
+    case 0xc00000e8: /* APERF_RD_ONLY */
+#define APERF_LO_VALUE 0xAEAEAEAE
+#define APERF_HI_VALUE 0xEAEAEAEA
+        *val = ((uint64_t)APERF_HI_VALUE << 32) | APERF_LO_VALUE;
+        return X86EMUL_OKAY;
+
     case 0xc0000080: /* EFER */
         *val = ctxt->addr_size > 32 ? 0x500 /* LME|LMA */ : 0;
         return X86EMUL_OKAY;
@@ -2231,6 +2238,30 @@  int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing rdpru...");
+    instr[0] = 0x0f; instr[1] = 0x01; instr[2] = 0xfd;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = 1;
+    regs.eflags = EFLAGS_ALWAYS_SET;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eax != APERF_LO_VALUE) || (regs.edx != APERF_HI_VALUE) ||
+         !(regs.eflags & X86_EFLAGS_CF) ||
+         (regs.eip != (unsigned long)&instr[3]) )
+        goto fail;
+    if ( ctxt.cpuid->extd.rdpru_max < 0xffff )
+    {
+        regs.eip = (unsigned long)&instr[0];
+        regs.ecx = ctxt.cpuid->extd.rdpru_max + 1;
+        regs.eflags = EFLAGS_ALWAYS_SET | X86_EFLAGS_CF;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || regs.eax || regs.edx ||
+             (regs.eflags & X86_EFLAGS_CF) ||
+             (regs.eip != (unsigned long)&instr[3]) )
+            goto fail;
+    }
+    printf("okay\n");
+
     printf("%-40s", "Testing movq %mm3,(%ecx)...");
     if ( stack_exec && cpu_has_mmx )
     {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -78,6 +78,8 @@  bool emul_test_init(void)
     cp.feat.rdpid = true;
     cp.feat.movdiri = true;
     cp.extd.clzero = true;
+    cp.extd.rdpru = true;
+    cp.extd.rdpru_max = 1;
 
     if ( cpu_has_xsave )
     {
@@ -150,11 +152,11 @@  int emul_test_cpuid(
     }
 
     /*
-     * The emulator doesn't itself use CLZERO, so we can always run the
+     * The emulator doesn't itself use CLZERO/RDPRU, so we can always run the
      * respective test(s).
      */
     if ( leaf == 0x80000008 )
-        res->b |= 1U << 0;
+        res->b |= (1U << 0) | (1U << 4);
 
     return X86EMUL_OKAY;
 }
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -232,8 +232,6 @@  static void recalculate_misc(struct cpui
     /* Most of Power/RAS hidden from guests. */
     p->extd.raw[0x7].a = p->extd.raw[0x7].b = p->extd.raw[0x7].c = 0;
 
-    p->extd.raw[0x8].d = 0;
-
     switch ( p->x86_vendor )
     {
     case X86_VENDOR_INTEL:
@@ -252,6 +250,7 @@  static void recalculate_misc(struct cpui
 
         p->extd.raw[0x8].a &= 0x0000ffff;
         p->extd.raw[0x8].c = 0;
+        p->extd.raw[0x8].d = 0;
         break;
 
     case X86_VENDOR_AMD:
@@ -270,6 +269,7 @@  static void recalculate_misc(struct cpui
 
         p->extd.raw[0x8].a &= 0x0000ffff; /* GuestMaxPhysAddr hidden. */
         p->extd.raw[0x8].c &= 0x0003f0ff;
+        p->extd.raw[0x8].d &= 0xffff0000;
 
         p->extd.raw[0x9] = EMPTY_LEAF;
 
@@ -577,6 +577,11 @@  void recalculate_cpuid_policy(struct dom
 
     p->extd.maxlinaddr = p->extd.lm ? 48 : 32;
 
+    if ( p->extd.rdpru )
+        p->extd.rdpru_max = min(p->extd.rdpru_max, max->extd.rdpru_max);
+    else
+        p->extd.rdpru_max = 0;
+
     recalculate_xstate(p);
     recalculate_misc(p);
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1874,6 +1874,7 @@  in_protmode(
 #define vcpu_has_fma4()        (ctxt->cpuid->extd.fma4)
 #define vcpu_has_tbm()         (ctxt->cpuid->extd.tbm)
 #define vcpu_has_clzero()      (ctxt->cpuid->extd.clzero)
+#define vcpu_has_rdpru()       (ctxt->cpuid->extd.rdpru)
 #define vcpu_has_wbnoinvd()    (ctxt->cpuid->extd.wbnoinvd)
 
 #define vcpu_has_bmi1()        (ctxt->cpuid->feat.bmi1)
@@ -5698,6 +5699,50 @@  x86_emulate(
                 limit -= sizeof(zero);
             }
             break;
+
+        case 0xfd: /* rdpru */
+            vcpu_must_have(rdpru);
+
+            if ( !mode_ring0() )
+            {
+                fail_if(!ops->read_cr);
+                if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
+                    goto done;
+                generate_exception_if(cr4 & X86_CR4_TSD, EXC_UD);
+            }
+
+            switch ( _regs.ecx | -(_regs.ecx > ctxt->cpuid->extd.rdpru_max) )
+            {
+            case 0:  n = MSR_IA32_MPERF; break;
+            case 1:  n = MSR_IA32_APERF; break;
+            default: n = 0; break;
+            }
+
+            _regs.eflags &= ~EFLAGS_MASK;
+            if ( n )
+            {
+                fail_if(!ops->read_msr);
+                switch ( rc = ops->read_msr(n, &msr_val, ctxt) )
+                {
+                case X86EMUL_OKAY:
+                    _regs.eflags |= X86_EFLAGS_CF;
+                    break;
+
+                case X86EMUL_EXCEPTION:
+                    x86_emul_reset_event(ctxt);
+                    rc = X86EMUL_OKAY;
+                    break;
+
+                default:
+                    goto done;
+                }
+            }
+
+            if ( !(_regs.eflags & X86_EFLAGS_CF) )
+                msr_val = 0;
+            _regs.r(dx) = msr_val >> 32;
+            _regs.r(ax) = (uint32_t)msr_val;
+            break;
         }
 
 #define _GRP7(mod, reg) \
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -76,7 +76,8 @@  enum GenericIntercept2bits
     GENERAL2_INTERCEPT_MONITOR = 1 << 10,
     GENERAL2_INTERCEPT_MWAIT   = 1 << 11,
     GENERAL2_INTERCEPT_MWAIT_CONDITIONAL = 1 << 12,
-    GENERAL2_INTERCEPT_XSETBV  = 1 << 13
+    GENERAL2_INTERCEPT_XSETBV  = 1 << 13,
+    GENERAL2_INTERCEPT_RDPRU   = 1 << 14,
 };
 
 
@@ -300,6 +301,7 @@  enum VMEXIT_EXITCODE
     VMEXIT_MWAIT            = 139, /* 0x8b */
     VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */
     VMEXIT_XSETBV           = 141, /* 0x8d */
+    VMEXIT_RDPRU            = 142, /* 0x8e */
     VMEXIT_NPF              = 1024, /* 0x400, nested paging fault */
     VMEXIT_INVALID          =  -1
 };
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -247,6 +247,7 @@  XEN_CPUFEATURE(EFRO,          7*32+10) /
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */
+XEN_CPUFEATURE(RDPRU,         8*32+ 4) /*A  RDPRU instruction */
 XEN_CPUFEATURE(WBNOINVD,      8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,          8*32+12) /*A  IBPB support only (no IBRS, used by AMD) */
 XEN_CPUFEATURE(AMD_PPIN,      8*32+23) /*   Protected Processor Inventory Number */
--- a/xen/include/xen/lib/x86/cpuid.h
+++ b/xen/include/xen/lib/x86/cpuid.h
@@ -264,7 +264,7 @@  struct cpuid_policy
                 struct { DECL_BITFIELD(e8b); };
             };
             uint32_t nc:8, :4, apic_id_size:4, :16;
-            uint32_t /* d */:32;
+            uint16_t :16, rdpru_max;
         };
     } extd;