diff mbox series

[v1.1,5/5] tests: Introduce a TSX test

Message ID 20210614104716.23405-1-andrew.cooper3@citrix.com (mailing list archive)
State New
Headers show
Series None | expand

Commit Message

Andrew Cooper June 14, 2021, 10:47 a.m. UTC
See the comment at the top of test-tsx.c for details.

This covers various complexities encountered while trying to address the
recent TSX deprecation on client parts.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

v1.1:
 * Set alternative guest policy, and check.
 * Cope with !HAP configurations.
 * Complete the comment at the top of test-tsx.c
---
 tools/tests/Makefile       |   1 +
 tools/tests/tsx/.gitignore |   1 +
 tools/tests/tsx/Makefile   |  43 ++++
 tools/tests/tsx/test-tsx.c | 515 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 560 insertions(+)
 create mode 100644 tools/tests/tsx/.gitignore
 create mode 100644 tools/tests/tsx/Makefile
 create mode 100644 tools/tests/tsx/test-tsx.c

Comments

Jan Beulich June 14, 2021, 1:31 p.m. UTC | #1
On 14.06.2021 12:47, Andrew Cooper wrote:
> --- /dev/null
> +++ b/tools/tests/tsx/Makefile
> @@ -0,0 +1,43 @@
> +XEN_ROOT = $(CURDIR)/../../..
> +include $(XEN_ROOT)/tools/Rules.mk
> +
> +TARGET := test-tsx
> +
> +.PHONY: all
> +all: $(TARGET)
> +
> +.PHONY: run
> +run: $(TARGET)
> +	./$(TARGET)
> +
> +.PHONY: clean
> +clean:
> +	$(RM) -f -- *.o $(TARGET) $(DEPS_RM)

I'm surprised this is necessary, but indeed I can see it elsewhere too.

> +.PHONY: distclean
> +distclean: clean
> +	$(RM) -f -- *~
> +
> +.PHONY: install
> +install: all
> +
> +.PHONY: uninstall
> +uninstall:
> +
> +CFLAGS += -Werror -std=gnu11

Is this strictly necessary? It excludes a fair share of the gcc
versions that we claim the tree can be built with. If it is
necessary, then I think it needs arranging for that the tools/
build as a whole won't fail just because of this test not
building. We do something along these lines for the x86 emulator
harness, for example.

> +CFLAGS += $(CFLAGS_xeninclude)
> +CFLAGS += $(CFLAGS_libxenctrl)
> +CFLAGS += $(CFLAGS_libxenguest)
> +CFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl -I$(XEN_ROOT)/tools/libs/guest
> +CFLAGS += $(APPEND_CFLAGS)
> +
> +LDFLAGS += $(LDLIBS_libxenctrl)
> +LDFLAGS += $(LDLIBS_libxenguest)
> +LDFLAGS += $(APPEND_LDFLAGS)
> +
> +test-tsx.o: Makefile
> +
> +test-tsx: test-tsx.o

Wouldn't you want to use $(TARGET) at least here?

> +/*
> + * Test a specific TSX MSR for consistency across the system, taking into
> + * account whether it ought to be accessable or not.
> + *
> + * We can't query offline CPUs, so skip those if encountered.  We don't care
> + * particularly for the exact MSR value, but we do care that it is the same
> + * everywhere.
> + */
> +static void test_tsx_msr_consistency(unsigned int msr, bool accessable)

Isn't it "accessible"?

> +{
> +    uint64_t cpu0_val = ~0;
> +
> +    for ( unsigned int cpu = 0; cpu <= physinfo.max_cpu_id; ++cpu )
> +    {
> +        xc_resource_entry_t ent = {
> +            .u.cmd = XEN_RESOURCE_OP_MSR_READ,
> +            .idx = msr,
> +        };
> +        xc_resource_op_t op = {
> +            .cpu = cpu,
> +            .entries = &ent,
> +            .nr_entries = 1,
> +        };
> +        int rc = xc_resource_op(xch, 1, &op);
> +
> +        if ( rc < 0 )
> +        {
> +            /* Don't emit a message for offline CPUs */
> +            if ( errno != ENODEV )
> +                fail("  xc_resource_op() for CPU%u failed: rc %d, errno %d - %s\n",
> +                     cpu, rc, errno, strerror(errno));
> +            continue;
> +        }
> +
> +        if ( accessable )
> +        {
> +            if ( rc != 1 )
> +            {
> +                fail("  Expected 1 result, got %u\n", rc);

%d

> +                continue;
> +            }
> +            if ( ent.u.ret != 0 )
> +            {
> +                fail("  Expected ok, got %d\n", ent.u.ret);
> +                continue;
> +            }
> +        }
> +        else
> +        {
> +            if ( rc != 0 )
> +                fail("  Expected 0 results, got %u\n", rc);
> +            else if ( ent.u.ret != -EPERM )
> +                fail("  Expected -EPERM, got %d\n", ent.u.ret);
> +            continue;
> +        }
> +
> +        if ( cpu == 0 )
> +        {
> +            cpu0_val = ent.val;
> +            printf("  CPU0 val %#"PRIx64"\n", cpu0_val);
> +        }
> +        else if ( ent.val != cpu0_val )
> +            fail("  CPU%u val %#"PRIx64" differes from CPU0 %#"PRIx64"\n",

Nit: differs?

> +/*
> + * Probe for how RTM behaves, deliberately not inspecting CPUID.
> + * Distinguishes between "no support at all" (i.e. XBEGIN suffers #UD),
> + * working ok, and appearing to always abort.
> + */
> +static enum rtm_behaviour probe_rtm_behaviour(void)
> +{
> +    for ( int i = 0; i < 1000; ++i )
> +    {
> +        /*
> +         * Opencoding the RTM infrastructure from immintrin.h, because we
> +         * still support older versions of GCC.  ALso so we can include #UD
> +         * detection logic.
> +         */
> +#define XBEGIN_STARTED -1
> +#define XBEGIN_UD      -2
> +        unsigned int status = XBEGIN_STARTED;
> +
> +        asm volatile (".Lxbegin: .byte 0xc7,0xf8,0,0,0,0" /* XBEGIN 1f; 1: */
> +                      : "+a" (status) :: "memory");
> +        if ( status == XBEGIN_STARTED )
> +        {
> +            asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); /* XEND */

Nit: This otherwise following hypervisor style, the asm()s want more
blanks added (also again further down).

> +            return RTM_OK;
> +        }
> +        else if ( status == XBEGIN_UD )
> +            return RTM_UD;
> +    }
> +
> +    return RTM_ABORT;
> +}
> +
> +static struct sigaction old_sigill;
> +
> +static void sigill_handler(int signo, siginfo_t *info, void *extra)
> +{
> +    extern char xbegin_label[] asm(".Lxbegin");

Perhaps add const? I'm also not sure about .L names used for extern-s.

> +    if ( info->si_addr == xbegin_label ||
> +         memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )

Why the || here? I could see you use && if you really wanted to be on
the safe side, but the way you have it I don't understand the
intentions.

> +    {
> +        ucontext_t *context = extra;
> +
> +        /*
> +         * Found the XBEGIN instruction.  Step over it, and update `status` to
> +         * signal #UD.
> +         */
> +#ifdef __x86_64__
> +        context->uc_mcontext.gregs[REG_RIP] += 6;
> +        context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
> +#else
> +        context->uc_mcontext.gregs[REG_EIP] += 6;
> +        context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
> +#endif

At the very least for this, don't you need to constrain the test to
just Linux?

> +static void test_tsx(void)
> +{
> +    int rc;
> +
> +    /* Read all policies except raw. */
> +    for ( int i = XEN_SYSCTL_cpu_policy_host;

To avoid having this as bad precedent, even though it's "just" testing
code: unsigned int? (I've first spotted this here, but later I've
found more places elsewhere.)

Jan
Andrew Cooper June 14, 2021, 2:50 p.m. UTC | #2
On 14/06/2021 14:31, Jan Beulich wrote:
> On 14.06.2021 12:47, Andrew Cooper wrote:
>> +.PHONY: distclean
>> +distclean: clean
>> +	$(RM) -f -- *~
>> +
>> +.PHONY: install
>> +install: all
>> +
>> +.PHONY: uninstall
>> +uninstall:
>> +
>> +CFLAGS += -Werror -std=gnu11
> Is this strictly necessary?

Appears not.  Dropped.

>
>> +            return RTM_OK;
>> +        }
>> +        else if ( status == XBEGIN_UD )
>> +            return RTM_UD;
>> +    }
>> +
>> +    return RTM_ABORT;
>> +}
>> +
>> +static struct sigaction old_sigill;
>> +
>> +static void sigill_handler(int signo, siginfo_t *info, void *extra)
>> +{
>> +    extern char xbegin_label[] asm(".Lxbegin");
> Perhaps add const? I'm also not sure about .L names used for extern-s.

Well - they work perfectly fine even with the Clang integrated assembler.

>
>> +    if ( info->si_addr == xbegin_label ||
>> +         memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )
> Why the || here? I could see you use && if you really wanted to be on
> the safe side, but the way you have it I don't understand the
> intentions.

That should have been &&, but I also appear to have lost a noclone
attribute too.

>
>> +    {
>> +        ucontext_t *context = extra;
>> +
>> +        /*
>> +         * Found the XBEGIN instruction.  Step over it, and update `status` to
>> +         * signal #UD.
>> +         */
>> +#ifdef __x86_64__
>> +        context->uc_mcontext.gregs[REG_RIP] += 6;
>> +        context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
>> +#else
>> +        context->uc_mcontext.gregs[REG_EIP] += 6;
>> +        context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
>> +#endif
> At the very least for this, don't you need to constrain the test to
> just Linux?

I guess it was too much to hope that this would be compatible across the
BSDs too.

And the FreeBSD CI did notice it, but apparently didn't email me...

I'll try to make it BSD compatible.

>
>> +static void test_tsx(void)
>> +{
>> +    int rc;
>> +
>> +    /* Read all policies except raw. */
>> +    for ( int i = XEN_SYSCTL_cpu_policy_host;
> To avoid having this as bad precedent, even though it's "just" testing
> code: unsigned int? (I've first spotted this here, but later I've
> found more places elsewhere.)

Well - I question if it even is "bad" precedent.

For array bounds which are constants, the compiler can (and does) do
better than anything we can write in C here, as it is arch-dependent
whether signed or unsigned is better to use.

Beyond that, it's just code volume.

~Andrew
Jan Beulich June 14, 2021, 2:59 p.m. UTC | #3
On 14.06.2021 16:50, Andrew Cooper wrote:
> On 14/06/2021 14:31, Jan Beulich wrote:
>> On 14.06.2021 12:47, Andrew Cooper wrote:
>>> +static void test_tsx(void)
>>> +{
>>> +    int rc;
>>> +
>>> +    /* Read all policies except raw. */
>>> +    for ( int i = XEN_SYSCTL_cpu_policy_host;
>> To avoid having this as bad precedent, even though it's "just" testing
>> code: unsigned int? (I've first spotted this here, but later I've
>> found more places elsewhere.)
> 
> Well - I question if it even is "bad" precedent.
> 
> For array bounds which are constants, the compiler can (and does) do
> better than anything we can write in C here, as it is arch-dependent
> whether signed or unsigned is better to use.
> 
> Beyond that, it's just code volume.

Well, no, I disagree. Any use of variables for array indexing,
when not intentionally meaning negative indexes as well, would
better use unsigned variables. This is just so that in cases
where it does matter, people will not end up cloning from an
instance where it may not be important because of, as you say,
e.g. constant loop bounds.

As to the compiler doing better - if it can when the induction
variable is (implicitly) signed, why would it not be able to
when the variable is (explicitly) unsigned?

Jan
Edwin Torok June 14, 2021, 3:55 p.m. UTC | #4
On Mon, 2021-06-14 at 11:47 +0100, Andrew Cooper wrote:
> See the comment at the top of test-tsx.c for details.
> 
> This covers various complexities encountered while trying to address
> the
> recent TSX deprecation on client parts.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> 
> v1.1:
>  * Set alternative guest policy, and check.
>  * Cope with !HAP configurations.
>  * Complete the comment at the top of test-tsx.c
> ---
>  tools/tests/Makefile       |   1 +
>  tools/tests/tsx/.gitignore |   1 +
>  tools/tests/tsx/Makefile   |  43 ++++
>  tools/tests/tsx/test-tsx.c | 515
> +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 560 insertions(+)
>  create mode 100644 tools/tests/tsx/.gitignore
>  create mode 100644 tools/tests/tsx/Makefile
>  create mode 100644 tools/tests/tsx/test-tsx.c
> 
> diff --git a/tools/tests/Makefile b/tools/tests/Makefile
> index 8746aabe6b..25531a984a 100644
> --- a/tools/tests/Makefile
> +++ b/tools/tests/Makefile
> @@ -5,6 +5,7 @@ SUBDIRS-y :=
>  SUBDIRS-y += resource
>  SUBDIRS-$(CONFIG_X86) += cpu-policy
>  SUBDIRS-$(CONFIG_X86) += mce-test
> +SUBDIRS-$(CONFIG_X86) += tsx
>  ifneq ($(clang),y)
>  SUBDIRS-$(CONFIG_X86) += x86_emulator
>  endif
> diff --git a/tools/tests/tsx/.gitignore b/tools/tests/tsx/.gitignore
> new file mode 100644
> index 0000000000..97ec4db7ff
> --- /dev/null
> +++ b/tools/tests/tsx/.gitignore
> @@ -0,0 +1 @@
> +test-tsx
> diff --git a/tools/tests/tsx/Makefile b/tools/tests/tsx/Makefile
> new file mode 100644
> index 0000000000..7381a4f5a4
> --- /dev/null
> +++ b/tools/tests/tsx/Makefile
> @@ -0,0 +1,43 @@
> +XEN_ROOT = $(CURDIR)/../../..
> +include $(XEN_ROOT)/tools/Rules.mk
> +
> +TARGET := test-tsx
> +
> +.PHONY: all
> +all: $(TARGET)
> +
> +.PHONY: run
> +run: $(TARGET)
> +	./$(TARGET)
> +
> +.PHONY: clean
> +clean:
> +	$(RM) -f -- *.o $(TARGET) $(DEPS_RM)
> +
> +.PHONY: distclean
> +distclean: clean
> +	$(RM) -f -- *~
> +
> +.PHONY: install
> +install: all
> +
> +.PHONY: uninstall
> +uninstall:
> +
> +CFLAGS += -Werror -std=gnu11
> +CFLAGS += $(CFLAGS_xeninclude)
> +CFLAGS += $(CFLAGS_libxenctrl)
> +CFLAGS += $(CFLAGS_libxenguest)
> +CFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl
> -I$(XEN_ROOT)/tools/libs/guest
> +CFLAGS += $(APPEND_CFLAGS)
> +
> +LDFLAGS += $(LDLIBS_libxenctrl)
> +LDFLAGS += $(LDLIBS_libxenguest)
> +LDFLAGS += $(APPEND_LDFLAGS)
> +
> +test-tsx.o: Makefile
> +
> +test-tsx: test-tsx.o
> +	$(CC) -o $@ $< $(LDFLAGS)
> +
> +-include $(DEPS_INCLUDE)
> diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c
> new file mode 100644
> index 0000000000..036b36e797
> --- /dev/null
> +++ b/tools/tests/tsx/test-tsx.c
> @@ -0,0 +1,515 @@
> +/*
> + * TSX settings and consistency tests
> + *
> + * This tests various behaviours and invariants with regards to
> TSX.  It
> + * ideally wants running for several microcode versions, and all
> applicable
> + * tsx= commandline settings, on a single CPU, including after an S3
> + * suspend/resume event.
> + *
> + * It tests specifically:
> + *  - The consistency of MSR_TSX_CTRL/MSR_TSX_FORCE_ABORT values
> across the
> + *    system, and their accessibility WRT data in the host CPU
> policy.
> + *  - The actual behaviour of RTM on the system.
> + *  - Cross-check the default/max policies based on the actual RTM
> behaviour.
> + *  - Create some guests, check their defaults, and check that the
> defaults
> + *    can be changed.
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include <err.h>
> +#include <errno.h>
> +#include <inttypes.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/ucontext.h>
> +
> +#include <xenctrl.h>
> +#include <xenguest.h>
> +#include <xen-tools/libs.h>
> +
> +#include "xg_private.h"
> +
> +enum {
> +#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
> +#include <xen/arch-x86/cpufeatureset.h>
> +};
> +#define bitmaskof(idx)      (1u << ((idx) & 31))
> +
> +#define MSR_ARCH_CAPABILITIES               0x0000010a
> +#define  ARCH_CAPS_TSX_CTRL                 (1 <<  7)
> +#define MSR_TSX_FORCE_ABORT                 0x0000010f
> +#define MSR_TSX_CTRL                        0x00000122
> +
> +static unsigned int nr_failures;
> +#define fail(fmt, ...)                          \
> +({                                              \
> +    nr_failures++;                              \
> +    (void)printf(fmt, ##__VA_ARGS__);           \
> +})
> +
> +static xc_interface *xch;
> +
> +/*
> + * Policies, arranged as an array for easy collection of all of
> them.  We
> + * don't care about the raw policy (index 0) so reuse that for the
> guest
> + * policy.
> + */
> +static struct xc_cpu_policy policies[6];
> +#define guest_policy policies[0]
> +#define host         policies[XEN_SYSCTL_cpu_policy_host]
> +#define pv_max       policies[XEN_SYSCTL_cpu_policy_pv_max]
> +#define hvm_max      policies[XEN_SYSCTL_cpu_policy_hvm_max]
> +#define pv_default   policies[XEN_SYSCTL_cpu_policy_pv_default]
> +#define hvm_default  policies[XEN_SYSCTL_cpu_policy_hvm_default]
> +
> +static bool xen_has_pv = true, xen_has_hvm = true;
> +
> +static xc_physinfo_t physinfo;
> +
> +static enum rtm_behaviour {
> +    RTM_UD,
> +    RTM_OK,
> +    RTM_ABORT,
> +} rtm_behaviour;
> +
> +/*
> + * Test a specific TSX MSR for consistency across the system, taking
> into
> + * account whether it ought to be accessable or not.
> + *
> + * We can't query offline CPUs, so skip those if encountered.  We
> don't care
> + * particularly for the exact MSR value, but we do care that it is
> the same
> + * everywhere.
> + */
> +static void test_tsx_msr_consistency(unsigned int msr, bool
> accessable)
> +{
> +    uint64_t cpu0_val = ~0;
> +
> +    for ( unsigned int cpu = 0; cpu <= physinfo.max_cpu_id; ++cpu )
> +    {
> +        xc_resource_entry_t ent = {
> +            .u.cmd = XEN_RESOURCE_OP_MSR_READ,
> +            .idx = msr,
> +        };
> +        xc_resource_op_t op = {
> +            .cpu = cpu,
> +            .entries = &ent,
> +            .nr_entries = 1,
> +        };
> +        int rc = xc_resource_op(xch, 1, &op);
> +
> +        if ( rc < 0 )
> +        {
> +            /* Don't emit a message for offline CPUs */
> +            if ( errno != ENODEV )
> +                fail("  xc_resource_op() for CPU%u failed: rc %d,
> errno %d - %s\n",
> +                     cpu, rc, errno, strerror(errno));
> +            continue;
> +        }
> +
> +        if ( accessable )
> +        {
> +            if ( rc != 1 )
> +            {
> +                fail("  Expected 1 result, got %u\n", rc);
> +                continue;
> +            }
> +            if ( ent.u.ret != 0 )
> +            {
> +                fail("  Expected ok, got %d\n", ent.u.ret);
> +                continue;
> +            }
> +        }
> +        else
> +        {
> +            if ( rc != 0 )
> +                fail("  Expected 0 results, got %u\n", rc);
> +            else if ( ent.u.ret != -EPERM )
> +                fail("  Expected -EPERM, got %d\n", ent.u.ret);
> +            continue;
> +        }
> +
> +        if ( cpu == 0 )
> +        {
> +            cpu0_val = ent.val;
> +            printf("  CPU0 val %#"PRIx64"\n", cpu0_val);
> +        }
> +        else if ( ent.val != cpu0_val )
> +            fail("  CPU%u val %#"PRIx64" differes from CPU0
> %#"PRIx64"\n",

Typo: differs?

> +                 cpu, ent.val, cpu0_val);
> +    }
> +}
> +
> +/*
> + * Check all TSX MSRs, and in particular that their accessibility
> matches what
> + * is expressed in the host CPU policy.
> + */
> +static void test_tsx_msrs(void)
> +{
> +    printf("Testing MSR_TSX_FORCE_ABORT consistency\n");
> +    test_tsx_msr_consistency(
> +        MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort);
> +
> +    printf("Testing MSR_TSX_CTRL consistency\n");
> +    test_tsx_msr_consistency(
> +        MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl);
> +}


This is great, could we extend the test to all MSRs that Xen knows
about and are expected to be identical? Particularly
MSR_SPEC_CTRL, MSR_MCU_OPT_CTRL, and I see some MSRs used for errata
workarounds like MSR_MCU_OPT_CTRL, possiblye more.

> +
> +/*
> + * Probe for how RTM behaves, deliberately not inspecting CPUID.
> + * Distinguishes between "no support at all" (i.e. XBEGIN suffers
> #UD),
> + * working ok, and appearing to always abort.
> + */
> +static enum rtm_behaviour probe_rtm_behaviour(void)
> +{
> +    for ( int i = 0; i < 1000; ++i )
> +    {
> +        /*
> +         * Opencoding the RTM infrastructure from immintrin.h,
> because we
> +         * still support older versions of GCC.  ALso so we can
> include #UD
> +         * detection logic.
> +         */
> +#define XBEGIN_STARTED -1
> +#define XBEGIN_UD      -2
> +        unsigned int status = XBEGIN_STARTED;
> +
> +        asm volatile (".Lxbegin: .byte 0xc7,0xf8,0,0,0,0" /* XBEGIN
> 1f; 1: */
> +                      : "+a" (status) :: "memory");
> +        if ( status == XBEGIN_STARTED )
> +        {
> +            asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); /*
> XEND */
> +            return RTM_OK;
> +        }
> +        else if ( status == XBEGIN_UD )
> +            return RTM_UD;
> +    }
> +
> +    return RTM_ABORT;
> +}
> +
> +static struct sigaction old_sigill;
> +
> +static void sigill_handler(int signo, siginfo_t *info, void *extra)
> +{
> +    extern char xbegin_label[] asm(".Lxbegin");
> +
> +    if ( info->si_addr == xbegin_label ||
> +         memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )
> +    {
> +        ucontext_t *context = extra;
> +
> +        /*
> +         * Found the XBEGIN instruction.  Step over it, and update
> `status` to
> +         * signal #UD.
> +         */
> +#ifdef __x86_64__
> +        context->uc_mcontext.gregs[REG_RIP] += 6;
> +        context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
> +#else
> +        context->uc_mcontext.gregs[REG_EIP] += 6;
> +        context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
> +#endif
> +    }
> +    else
> +    {
> +        /*
> +         * Not the SIGILL we're looking for...  Restore the old
> handler and
> +         * try again.  Will likely coredump as a result.
> +         */
> +        sigaction(SIGILL, &old_sigill, NULL);
> +    }
> +}
> +
> +static void test_rtm_behaviour(void)
> +{
> +    struct sigaction new_sigill = {
> +        .sa_flags = SA_SIGINFO,
> +        .sa_sigaction = sigill_handler,
> +    };
> +    const char *str;
> +
> +    printf("Testing RTM behaviour\n");
> +
> +    /*
> +     * Install a custom SIGILL handler while probing for RTM
> behaviour, as the
> +     * XBEGIN instruction might suffer #UD.
> +     */
> +    sigaction(SIGILL, &new_sigill, &old_sigill);
> +    rtm_behaviour = probe_rtm_behaviour();
> +    sigaction(SIGILL, &old_sigill, NULL);
> +
> +    switch ( rtm_behaviour )
> +    {
> +    case RTM_UD:    str = "#UD";   break;
> +    case RTM_OK:    str = "OK";    break;
> +    case RTM_ABORT: str = "Abort"; break;
> +    default:        str = NULL;    break;
> +    }
> +
> +    if ( str )
> +        printf("  Got %s\n", str);
> +    else
> +        return fail("  Got unexpected behaviour %d\n",
> rtm_behaviour);
> +
> +    if ( host.cpuid.feat.rtm )
> +    {
> +        if ( rtm_behaviour == RTM_UD )
> +            fail("  Host reports RTM, but appears unavailable\n");
> +    }
> +    else
> +    {
> +        if ( rtm_behaviour != RTM_UD )
> +            fail("  Host reports no RTM, but appears available\n");
> +    }
> +}
> +
> +static void dump_tsx_details(const struct xc_cpu_policy *p, const
> char *pref)
> +{
> +    printf("  %s RTM %u, HLE %u, TSX_FORCE_ABORT %u,
> RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n",
> +           pref,
> +           p->cpuid.feat.rtm,
> +           p->cpuid.feat.hle,
> +           p->cpuid.feat.tsx_force_abort,
> +           p->cpuid.feat.rtm_always_abort,
> +           p->msr.arch_caps.tsx_ctrl);
> +}
> +
> +/* Sanity test various invariants we expect in the default/max
> policies. */
> +static void test_guest_policies(const struct xc_cpu_policy *max,
> +                                const struct xc_cpu_policy *def)
> +{
> +    const struct cpuid_policy *cm = &max->cpuid;
> +    const struct cpuid_policy *cd = &def->cpuid;
> +    const struct msr_policy *mm = &max->msr;
> +    const struct msr_policy *md = &def->msr;
> +
> +    dump_tsx_details(max, "Max:");
> +    dump_tsx_details(def, "Def:");
> +
> +    if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) &
> +          (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) |
> +           bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT))) ||
> +         ((mm->arch_caps.raw | md->arch_caps.raw) &
> ARCH_CAPS_TSX_CTRL) )
> +        fail("  Xen-only TSX controls offered to guest\n");
> +
> +    switch ( rtm_behaviour )
> +    {
> +    case RTM_UD:
> +        if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) &
> +             (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)) )
> +             fail("  HLE/RTM offered to guests despite not being
> available\n");
> +        break;
> +
> +    case RTM_ABORT:
> +        if ( cd->feat.raw[0].b &
> +             (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)) )
> +             fail("  HLE/RTM offered to guests by default despite
> not being usable\n");
> +        break;
> +
> +    case RTM_OK:
> +        if ( !cm->feat.rtm || !cd->feat.rtm )
> +             fail("  RTM not offered to guests despite being
> available\n");
> +        break;
> +    }
> +
> +    if ( cd->feat.hle )
> +        fail("  Fail: HLE offered in default policy\n");
> +}
> +
> +static void test_def_max_policies(void)
> +{
> +    if ( xen_has_pv )
> +    {
> +        printf("Testing PV default/max policies\n");
> +        test_guest_policies(&pv_max, &pv_default);
> +    }
> +
> +    if ( xen_has_hvm )
> +    {
> +        printf("Testing HVM default/max policies\n");
> +        test_guest_policies(&hvm_max, &hvm_default);
> +    }
> +}
> +
> +static void test_guest(struct xen_domctl_createdomain *c)
> +{
> +    uint32_t domid = 0;
> +    int rc;
> +
> +    rc = xc_domain_create(xch, &domid, c);
> +    if ( rc )
> +        return fail("  Domain create failure: %d - %s\n",
> +                    errno, strerror(errno));
> +
> +    printf("  Created d%u\n", domid);
> +
> +    rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
> +    if ( rc )
> +    {
> +        fail("  Failed to obtain domain policy: %d - %s\n",
> +             errno, strerror(errno));
> +        goto out;
> +    }
> +
> +    dump_tsx_details(&guest_policy, "Cur:");
> +
> +    /*
> +     * Check defaults given to the guest.
> +     */
> +    if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) )
> +        fail("  RTM %u in guest, despite rtm behaviour\n",
> +             guest_policy.cpuid.feat.rtm);
> +
> +    if ( guest_policy.cpuid.feat.hle ||
> +         guest_policy.cpuid.feat.tsx_force_abort ||
> +         guest_policy.cpuid.feat.rtm_always_abort ||
> +         guest_policy.msr.arch_caps.tsx_ctrl )
> +        fail("  Unexpected features advertised\n");
> +
> +    if ( host.cpuid.feat.rtm )
> +    {
> +        unsigned int _7b0;
> +
> +        /*
> +         * If host RTM is available, all combinations of guest flags
> should be
> +         * possible.  Flip both HLE/RTM to check non-default
> settings.
> +         */
> +        _7b0 = (guest_policy.cpuid.feat.raw[0].b ^=
> +                (bitmaskof(X86_FEATURE_HLE) |
> bitmaskof(X86_FEATURE_RTM)));
> +
> +        /* Set the new policy. */
> +        rc = xc_cpu_policy_set_domain(xch, domid, &guest_policy);
> +        if ( rc )
> +        {
> +            fail("  Failed to set domain policy: %d - %s\n",
> +                 errno, strerror(errno));
> +            goto out;
> +        }
> +
> +        /* Re-get the new policy. */
> +        rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
> +        if ( rc )
> +        {
> +            fail("  Failed to obtain domain policy: %d - %s\n",
> +                 errno, strerror(errno));
> +            goto out;
> +        }
> +
> +        dump_tsx_details(&guest_policy, "Cur:");
> +
> +        if ( guest_policy.cpuid.feat.raw[0].b != _7b0 )
> +        {
> +            fail("  Expected CPUID.7[1].b 0x%08x differes from
> actual 0x%08x\n",
> +                 _7b0, guest_policy.cpuid.feat.raw[0].b);
> +            goto out;
> +        }
> +    }
> +
> + out:
> +    rc = xc_domain_destroy(xch, domid);
> +    if ( rc )
> +        fail("  Failed to destroy domain: %d - %s\n",
> +             errno, strerror(errno));
> +}
> +
> +static void test_guests(void)
> +{
> +    if ( xen_has_pv )
> +    {
> +        struct xen_domctl_createdomain c = {
> +            .max_vcpus = 1,
> +            .max_grant_frames = 1,
> +        };
> +
> +        printf("Testing PV guest\n");
> +        test_guest(&c);
> +    }
> +
> +    if ( xen_has_hvm )
> +    {
> +        struct xen_domctl_createdomain c = {
> +            .flags = XEN_DOMCTL_CDF_hvm,
> +            .max_vcpus = 1,
> +            .max_grant_frames = 1,
> +            .arch = {
> +                .emulation_flags = XEN_X86_EMU_LAPIC,
> +            },
> +        };
> +
> +        if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap )
> +            c.flags |= XEN_DOMCTL_CDF_hap;
> +        else if ( !(physinfo.capabilities &
> XEN_SYSCTL_PHYSCAP_shadow) )
> +            return fail("  HVM available, but neither HAP nor
> Shadow\n");
> +
> +        printf("Testing HVM guest\n");
> +        test_guest(&c);
> +    }
> +}
> +
> +/* Obtain some general data, then run the tests. */
> +static void test_tsx(void)
> +{
> +    int rc;
> +
> +    /* Read all policies except raw. */
> +    for ( int i = XEN_SYSCTL_cpu_policy_host;
> +          i <= XEN_SYSCTL_cpu_policy_hvm_default; ++i )
> +    {
> +        rc = xc_cpu_policy_get_system(xch, i, &policies[i]);
> +
> +        if ( rc == -1 && errno == EOPNOTSUPP )
> +        {
> +            /*
> +             * Use EOPNOTSUPP to spot Xen missing CONFIG_{PV,HVM},
> and adjust
> +             * later testing accordingly.
> +             */
> +            switch ( i )
> +            {
> +            case XEN_SYSCTL_cpu_policy_pv_max:
> +            case XEN_SYSCTL_cpu_policy_pv_default:
> +                if ( xen_has_pv )
> +                    printf("  Xen doesn't support PV\n");
> +                xen_has_pv = false;
> +                continue;
> +
> +            case XEN_SYSCTL_cpu_policy_hvm_max:
> +            case XEN_SYSCTL_cpu_policy_hvm_default:
> +                if ( xen_has_hvm )
> +                    printf("  Xen doesn't support HVM\n");
> +                xen_has_hvm = false;
> +                continue;
> +            }
> +        }
> +        if ( rc )
> +            return fail("Failed to obtain policy[%u]: %d - %s\n",
> +                        i, errno, strerror(errno));
> +    }
> +
> +    rc = xc_physinfo(xch, &physinfo);
> +    if ( rc )
> +        return fail("Failed to obtain physinfo: %d - %s\n",
> +                    errno, strerror(errno));
> +
> +    printf("  Got %u CPUs\n", physinfo.max_cpu_id + 1);
> +
> +    test_tsx_msrs();
> +    test_rtm_behaviour();
> +    test_def_max_policies();
> +    test_guests();
> +}
> +
> +int main(int argc, char **argv)
> +{
> +    printf("TSX tests\n");
> +
> +    xch = xc_interface_open(NULL, NULL, 0);
> +
> +    if ( !xch )
> +        err(1, "xc_interface_open");
> +
> +    test_tsx();
> +
> +    return !!nr_failures;
> +}
Andrew Cooper June 14, 2021, 4:32 p.m. UTC | #5
On 14/06/2021 16:55, Edwin Torok wrote:
> On Mon, 2021-06-14 at 11:47 +0100, Andrew Cooper wrote:
>> +/*
>> + * Check all TSX MSRs, and in particular that their accessibility
>> matches what
>> + * is expressed in the host CPU policy.
>> + */
>> +static void test_tsx_msrs(void)
>> +{
>> +    printf("Testing MSR_TSX_FORCE_ABORT consistency\n");
>> +    test_tsx_msr_consistency(
>> +        MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort);
>> +
>> +    printf("Testing MSR_TSX_CTRL consistency\n");
>> +    test_tsx_msr_consistency(
>> +        MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl);
>> +}
>
> This is great, could we extend the test to all MSRs that Xen knows
> about and are expected to be identical? Particularly
> MSR_SPEC_CTRL, MSR_MCU_OPT_CTRL, and I see some MSRs used for errata
> workarounds like MSR_MCU_OPT_CTRL, possiblye more.

MSR_SPEC_CTRL, no.  It's value is influenced by the guest kernel in
context, and we would not expect it to be consistent across the system
at an arbitrary point in time.

MSR_MCU_OPT_CTRL might be a good candidate for a future change, but it's
not related to TSX.  (That said, it is actually how I spotted XSA-377).

~Andrew
diff mbox series

Patch

diff --git a/tools/tests/Makefile b/tools/tests/Makefile
index 8746aabe6b..25531a984a 100644
--- a/tools/tests/Makefile
+++ b/tools/tests/Makefile
@@ -5,6 +5,7 @@  SUBDIRS-y :=
 SUBDIRS-y += resource
 SUBDIRS-$(CONFIG_X86) += cpu-policy
 SUBDIRS-$(CONFIG_X86) += mce-test
+SUBDIRS-$(CONFIG_X86) += tsx
 ifneq ($(clang),y)
 SUBDIRS-$(CONFIG_X86) += x86_emulator
 endif
diff --git a/tools/tests/tsx/.gitignore b/tools/tests/tsx/.gitignore
new file mode 100644
index 0000000000..97ec4db7ff
--- /dev/null
+++ b/tools/tests/tsx/.gitignore
@@ -0,0 +1 @@ 
+test-tsx
diff --git a/tools/tests/tsx/Makefile b/tools/tests/tsx/Makefile
new file mode 100644
index 0000000000..7381a4f5a4
--- /dev/null
+++ b/tools/tests/tsx/Makefile
@@ -0,0 +1,43 @@ 
+XEN_ROOT = $(CURDIR)/../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+TARGET := test-tsx
+
+.PHONY: all
+all: $(TARGET)
+
+.PHONY: run
+run: $(TARGET)
+	./$(TARGET)
+
+.PHONY: clean
+clean:
+	$(RM) -f -- *.o $(TARGET) $(DEPS_RM)
+
+.PHONY: distclean
+distclean: clean
+	$(RM) -f -- *~
+
+.PHONY: install
+install: all
+
+.PHONY: uninstall
+uninstall:
+
+CFLAGS += -Werror -std=gnu11
+CFLAGS += $(CFLAGS_xeninclude)
+CFLAGS += $(CFLAGS_libxenctrl)
+CFLAGS += $(CFLAGS_libxenguest)
+CFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl -I$(XEN_ROOT)/tools/libs/guest
+CFLAGS += $(APPEND_CFLAGS)
+
+LDFLAGS += $(LDLIBS_libxenctrl)
+LDFLAGS += $(LDLIBS_libxenguest)
+LDFLAGS += $(APPEND_LDFLAGS)
+
+test-tsx.o: Makefile
+
+test-tsx: test-tsx.o
+	$(CC) -o $@ $< $(LDFLAGS)
+
+-include $(DEPS_INCLUDE)
diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c
new file mode 100644
index 0000000000..036b36e797
--- /dev/null
+++ b/tools/tests/tsx/test-tsx.c
@@ -0,0 +1,515 @@ 
+/*
+ * TSX settings and consistency tests
+ *
+ * This tests various behaviours and invariants with regards to TSX.  It
+ * ideally wants running for several microcode versions, and all applicable
+ * tsx= commandline settings, on a single CPU, including after an S3
+ * suspend/resume event.
+ *
+ * It tests specifically:
+ *  - The consistency of MSR_TSX_CTRL/MSR_TSX_FORCE_ABORT values across the
+ *    system, and their accessibility WRT data in the host CPU policy.
+ *  - The actual behaviour of RTM on the system.
+ *  - Cross-check the default/max policies based on the actual RTM behaviour.
+ *  - Create some guests, check their defaults, and check that the defaults
+ *    can be changed.
+ */
+
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+
+#include <xenctrl.h>
+#include <xenguest.h>
+#include <xen-tools/libs.h>
+
+#include "xg_private.h"
+
+enum {
+#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
+#include <xen/arch-x86/cpufeatureset.h>
+};
+#define bitmaskof(idx)      (1u << ((idx) & 31))
+
+#define MSR_ARCH_CAPABILITIES               0x0000010a
+#define  ARCH_CAPS_TSX_CTRL                 (1 <<  7)
+#define MSR_TSX_FORCE_ABORT                 0x0000010f
+#define MSR_TSX_CTRL                        0x00000122
+
+static unsigned int nr_failures;
+#define fail(fmt, ...)                          \
+({                                              \
+    nr_failures++;                              \
+    (void)printf(fmt, ##__VA_ARGS__);           \
+})
+
+static xc_interface *xch;
+
+/*
+ * Policies, arranged as an array for easy collection of all of them.  We
+ * don't care about the raw policy (index 0) so reuse that for the guest
+ * policy.
+ */
+static struct xc_cpu_policy policies[6];
+#define guest_policy policies[0]
+#define host         policies[XEN_SYSCTL_cpu_policy_host]
+#define pv_max       policies[XEN_SYSCTL_cpu_policy_pv_max]
+#define hvm_max      policies[XEN_SYSCTL_cpu_policy_hvm_max]
+#define pv_default   policies[XEN_SYSCTL_cpu_policy_pv_default]
+#define hvm_default  policies[XEN_SYSCTL_cpu_policy_hvm_default]
+
+static bool xen_has_pv = true, xen_has_hvm = true;
+
+static xc_physinfo_t physinfo;
+
+static enum rtm_behaviour {
+    RTM_UD,
+    RTM_OK,
+    RTM_ABORT,
+} rtm_behaviour;
+
+/*
+ * Test a specific TSX MSR for consistency across the system, taking into
+ * account whether it ought to be accessable or not.
+ *
+ * We can't query offline CPUs, so skip those if encountered.  We don't care
+ * particularly for the exact MSR value, but we do care that it is the same
+ * everywhere.
+ */
+static void test_tsx_msr_consistency(unsigned int msr, bool accessable)
+{
+    uint64_t cpu0_val = ~0;
+
+    for ( unsigned int cpu = 0; cpu <= physinfo.max_cpu_id; ++cpu )
+    {
+        xc_resource_entry_t ent = {
+            .u.cmd = XEN_RESOURCE_OP_MSR_READ,
+            .idx = msr,
+        };
+        xc_resource_op_t op = {
+            .cpu = cpu,
+            .entries = &ent,
+            .nr_entries = 1,
+        };
+        int rc = xc_resource_op(xch, 1, &op);
+
+        if ( rc < 0 )
+        {
+            /* Don't emit a message for offline CPUs */
+            if ( errno != ENODEV )
+                fail("  xc_resource_op() for CPU%u failed: rc %d, errno %d - %s\n",
+                     cpu, rc, errno, strerror(errno));
+            continue;
+        }
+
+        if ( accessable )
+        {
+            if ( rc != 1 )
+            {
+                fail("  Expected 1 result, got %u\n", rc);
+                continue;
+            }
+            if ( ent.u.ret != 0 )
+            {
+                fail("  Expected ok, got %d\n", ent.u.ret);
+                continue;
+            }
+        }
+        else
+        {
+            if ( rc != 0 )
+                fail("  Expected 0 results, got %u\n", rc);
+            else if ( ent.u.ret != -EPERM )
+                fail("  Expected -EPERM, got %d\n", ent.u.ret);
+            continue;
+        }
+
+        if ( cpu == 0 )
+        {
+            cpu0_val = ent.val;
+            printf("  CPU0 val %#"PRIx64"\n", cpu0_val);
+        }
+        else if ( ent.val != cpu0_val )
+            fail("  CPU%u val %#"PRIx64" differes from CPU0 %#"PRIx64"\n",
+                 cpu, ent.val, cpu0_val);
+    }
+}
+
+/*
+ * Check all TSX MSRs, and in particular that their accessibility matches what
+ * is expressed in the host CPU policy.
+ */
+static void test_tsx_msrs(void)
+{
+    printf("Testing MSR_TSX_FORCE_ABORT consistency\n");
+    test_tsx_msr_consistency(
+        MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort);
+
+    printf("Testing MSR_TSX_CTRL consistency\n");
+    test_tsx_msr_consistency(
+        MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl);
+}
+
+/*
+ * Probe for how RTM behaves, deliberately not inspecting CPUID.
+ * Distinguishes between "no support at all" (i.e. XBEGIN suffers #UD),
+ * working ok, and appearing to always abort.
+ */
+static enum rtm_behaviour probe_rtm_behaviour(void)
+{
+    for ( int i = 0; i < 1000; ++i )
+    {
+        /*
+         * Opencoding the RTM infrastructure from immintrin.h, because we
+         * still support older versions of GCC.  ALso so we can include #UD
+         * detection logic.
+         */
+#define XBEGIN_STARTED -1
+#define XBEGIN_UD      -2
+        unsigned int status = XBEGIN_STARTED;
+
+        asm volatile (".Lxbegin: .byte 0xc7,0xf8,0,0,0,0" /* XBEGIN 1f; 1: */
+                      : "+a" (status) :: "memory");
+        if ( status == XBEGIN_STARTED )
+        {
+            asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); /* XEND */
+            return RTM_OK;
+        }
+        else if ( status == XBEGIN_UD )
+            return RTM_UD;
+    }
+
+    return RTM_ABORT;
+}
+
+static struct sigaction old_sigill;
+
+static void sigill_handler(int signo, siginfo_t *info, void *extra)
+{
+    extern char xbegin_label[] asm(".Lxbegin");
+
+    if ( info->si_addr == xbegin_label ||
+         memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )
+    {
+        ucontext_t *context = extra;
+
+        /*
+         * Found the XBEGIN instruction.  Step over it, and update `status` to
+         * signal #UD.
+         */
+#ifdef __x86_64__
+        context->uc_mcontext.gregs[REG_RIP] += 6;
+        context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
+#else
+        context->uc_mcontext.gregs[REG_EIP] += 6;
+        context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
+#endif
+    }
+    else
+    {
+        /*
+         * Not the SIGILL we're looking for...  Restore the old handler and
+         * try again.  Will likely coredump as a result.
+         */
+        sigaction(SIGILL, &old_sigill, NULL);
+    }
+}
+
+static void test_rtm_behaviour(void)
+{
+    struct sigaction new_sigill = {
+        .sa_flags = SA_SIGINFO,
+        .sa_sigaction = sigill_handler,
+    };
+    const char *str;
+
+    printf("Testing RTM behaviour\n");
+
+    /*
+     * Install a custom SIGILL handler while probing for RTM behaviour, as the
+     * XBEGIN instruction might suffer #UD.
+     */
+    sigaction(SIGILL, &new_sigill, &old_sigill);
+    rtm_behaviour = probe_rtm_behaviour();
+    sigaction(SIGILL, &old_sigill, NULL);
+
+    switch ( rtm_behaviour )
+    {
+    case RTM_UD:    str = "#UD";   break;
+    case RTM_OK:    str = "OK";    break;
+    case RTM_ABORT: str = "Abort"; break;
+    default:        str = NULL;    break;
+    }
+
+    if ( str )
+        printf("  Got %s\n", str);
+    else
+        return fail("  Got unexpected behaviour %d\n", rtm_behaviour);
+
+    if ( host.cpuid.feat.rtm )
+    {
+        if ( rtm_behaviour == RTM_UD )
+            fail("  Host reports RTM, but appears unavailable\n");
+    }
+    else
+    {
+        if ( rtm_behaviour != RTM_UD )
+            fail("  Host reports no RTM, but appears available\n");
+    }
+}
+
+static void dump_tsx_details(const struct xc_cpu_policy *p, const char *pref)
+{
+    printf("  %s RTM %u, HLE %u, TSX_FORCE_ABORT %u, RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n",
+           pref,
+           p->cpuid.feat.rtm,
+           p->cpuid.feat.hle,
+           p->cpuid.feat.tsx_force_abort,
+           p->cpuid.feat.rtm_always_abort,
+           p->msr.arch_caps.tsx_ctrl);
+}
+
+/* Sanity test various invariants we expect in the default/max policies. */
+static void test_guest_policies(const struct xc_cpu_policy *max,
+                                const struct xc_cpu_policy *def)
+{
+    const struct cpuid_policy *cm = &max->cpuid;
+    const struct cpuid_policy *cd = &def->cpuid;
+    const struct msr_policy *mm = &max->msr;
+    const struct msr_policy *md = &def->msr;
+
+    dump_tsx_details(max, "Max:");
+    dump_tsx_details(def, "Def:");
+
+    if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) &
+          (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) |
+           bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT))) ||
+         ((mm->arch_caps.raw | md->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) )
+        fail("  Xen-only TSX controls offered to guest\n");
+
+    switch ( rtm_behaviour )
+    {
+    case RTM_UD:
+        if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) &
+             (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) )
+             fail("  HLE/RTM offered to guests despite not being available\n");
+        break;
+
+    case RTM_ABORT:
+        if ( cd->feat.raw[0].b &
+             (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) )
+             fail("  HLE/RTM offered to guests by default despite not being usable\n");
+        break;
+
+    case RTM_OK:
+        if ( !cm->feat.rtm || !cd->feat.rtm )
+             fail("  RTM not offered to guests despite being available\n");
+        break;
+    }
+
+    if ( cd->feat.hle )
+        fail("  Fail: HLE offered in default policy\n");
+}
+
+static void test_def_max_policies(void)
+{
+    if ( xen_has_pv )
+    {
+        printf("Testing PV default/max policies\n");
+        test_guest_policies(&pv_max, &pv_default);
+    }
+
+    if ( xen_has_hvm )
+    {
+        printf("Testing HVM default/max policies\n");
+        test_guest_policies(&hvm_max, &hvm_default);
+    }
+}
+
+static void test_guest(struct xen_domctl_createdomain *c)
+{
+    uint32_t domid = 0;
+    int rc;
+
+    rc = xc_domain_create(xch, &domid, c);
+    if ( rc )
+        return fail("  Domain create failure: %d - %s\n",
+                    errno, strerror(errno));
+
+    printf("  Created d%u\n", domid);
+
+    rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
+    if ( rc )
+    {
+        fail("  Failed to obtain domain policy: %d - %s\n",
+             errno, strerror(errno));
+        goto out;
+    }
+
+    dump_tsx_details(&guest_policy, "Cur:");
+
+    /*
+     * Check defaults given to the guest.
+     */
+    if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) )
+        fail("  RTM %u in guest, despite rtm behaviour\n",
+             guest_policy.cpuid.feat.rtm);
+
+    if ( guest_policy.cpuid.feat.hle ||
+         guest_policy.cpuid.feat.tsx_force_abort ||
+         guest_policy.cpuid.feat.rtm_always_abort ||
+         guest_policy.msr.arch_caps.tsx_ctrl )
+        fail("  Unexpected features advertised\n");
+
+    if ( host.cpuid.feat.rtm )
+    {
+        unsigned int _7b0;
+
+        /*
+         * If host RTM is available, all combinations of guest flags should be
+         * possible.  Flip both HLE/RTM to check non-default settings.
+         */
+        _7b0 = (guest_policy.cpuid.feat.raw[0].b ^=
+                (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)));
+
+        /* Set the new policy. */
+        rc = xc_cpu_policy_set_domain(xch, domid, &guest_policy);
+        if ( rc )
+        {
+            fail("  Failed to set domain policy: %d - %s\n",
+                 errno, strerror(errno));
+            goto out;
+        }
+
+        /* Re-get the new policy. */
+        rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
+        if ( rc )
+        {
+            fail("  Failed to obtain domain policy: %d - %s\n",
+                 errno, strerror(errno));
+            goto out;
+        }
+
+        dump_tsx_details(&guest_policy, "Cur:");
+
+        if ( guest_policy.cpuid.feat.raw[0].b != _7b0 )
+        {
+            fail("  Expected CPUID.7[1].b 0x%08x differes from actual 0x%08x\n",
+                 _7b0, guest_policy.cpuid.feat.raw[0].b);
+            goto out;
+        }
+    }
+
+ out:
+    rc = xc_domain_destroy(xch, domid);
+    if ( rc )
+        fail("  Failed to destroy domain: %d - %s\n",
+             errno, strerror(errno));
+}
+
+static void test_guests(void)
+{
+    if ( xen_has_pv )
+    {
+        struct xen_domctl_createdomain c = {
+            .max_vcpus = 1,
+            .max_grant_frames = 1,
+        };
+
+        printf("Testing PV guest\n");
+        test_guest(&c);
+    }
+
+    if ( xen_has_hvm )
+    {
+        struct xen_domctl_createdomain c = {
+            .flags = XEN_DOMCTL_CDF_hvm,
+            .max_vcpus = 1,
+            .max_grant_frames = 1,
+            .arch = {
+                .emulation_flags = XEN_X86_EMU_LAPIC,
+            },
+        };
+
+        if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap )
+            c.flags |= XEN_DOMCTL_CDF_hap;
+        else if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_shadow) )
+            return fail("  HVM available, but neither HAP nor Shadow\n");
+
+        printf("Testing HVM guest\n");
+        test_guest(&c);
+    }
+}
+
+/* Obtain some general data, then run the tests. */
+static void test_tsx(void)
+{
+    int rc;
+
+    /* Read all policies except raw. */
+    for ( int i = XEN_SYSCTL_cpu_policy_host;
+          i <= XEN_SYSCTL_cpu_policy_hvm_default; ++i )
+    {
+        rc = xc_cpu_policy_get_system(xch, i, &policies[i]);
+
+        if ( rc == -1 && errno == EOPNOTSUPP )
+        {
+            /*
+             * Use EOPNOTSUPP to spot Xen missing CONFIG_{PV,HVM}, and adjust
+             * later testing accordingly.
+             */
+            switch ( i )
+            {
+            case XEN_SYSCTL_cpu_policy_pv_max:
+            case XEN_SYSCTL_cpu_policy_pv_default:
+                if ( xen_has_pv )
+                    printf("  Xen doesn't support PV\n");
+                xen_has_pv = false;
+                continue;
+
+            case XEN_SYSCTL_cpu_policy_hvm_max:
+            case XEN_SYSCTL_cpu_policy_hvm_default:
+                if ( xen_has_hvm )
+                    printf("  Xen doesn't support HVM\n");
+                xen_has_hvm = false;
+                continue;
+            }
+        }
+        if ( rc )
+            return fail("Failed to obtain policy[%u]: %d - %s\n",
+                        i, errno, strerror(errno));
+    }
+
+    rc = xc_physinfo(xch, &physinfo);
+    if ( rc )
+        return fail("Failed to obtain physinfo: %d - %s\n",
+                    errno, strerror(errno));
+
+    printf("  Got %u CPUs\n", physinfo.max_cpu_id + 1);
+
+    test_tsx_msrs();
+    test_rtm_behaviour();
+    test_def_max_policies();
+    test_guests();
+}
+
+int main(int argc, char **argv)
+{
+    printf("TSX tests\n");
+
+    xch = xc_interface_open(NULL, NULL, 0);
+
+    if ( !xch )
+        err(1, "xc_interface_open");
+
+    test_tsx();
+
+    return !!nr_failures;
+}