@@ -22,6 +22,8 @@ typedef bool bool_t;
#define cpu_has_amd_erratum(nr) 0
#define mark_regs_dirty(r) ((void)(r))
+#define read_bndcfgu() 0
+#define xstate_set_init(what)
#define likely(x) __builtin_expect(!!(x), true)
#define unlikely(x) __builtin_expect(!!(x), false)
@@ -421,6 +421,8 @@ int vcpu_initialise(struct vcpu *v)
vmce_init_vcpu(v);
}
+ else if ( (rc = xstate_alloc_save_area(v)) != 0 )
+ return rc;
spin_lock_init(&v->arch.vpmu.vpmu_lock);
@@ -409,6 +409,9 @@ typedef union {
#define MSR_SYSENTER_CS 0x00000174
#define MSR_SYSENTER_ESP 0x00000175
#define MSR_SYSENTER_EIP 0x00000176
+#define MSR_BNDCFGS 0x00000d90
+#define BNDCFG_ENABLE (1 << 0)
+#define BNDCFG_PRESERVE (1 << 1)
#define MSR_EFER 0xc0000080
#define MSR_STAR 0xc0000081
#define MSR_LSTAR 0xc0000082
@@ -1172,6 +1175,7 @@ static bool_t vcpu_has(
#define vcpu_has_bmi1() vcpu_has(0x00000007, EBX, 3, ctxt, ops)
#define vcpu_has_hle() vcpu_has(0x00000007, EBX, 4, ctxt, ops)
#define vcpu_has_rtm() vcpu_has(0x00000007, EBX, 11, ctxt, ops)
+#define vcpu_has_mpx() vcpu_has(0x00000007, EBX, 14, ctxt, ops)
#define vcpu_must_have(leaf, reg, bit) \
generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -1616,6 +1620,34 @@ static int inject_swint(enum x86_swint_t
return ops->inject_hw_exception(fault_type, error_code, ctxt);
}
+static void clear_bnd(struct x86_emulate_ctxt *ctxt,
+ const struct x86_emulate_ops *ops, enum vex_pfx pfx)
+{
+ uint64_t bndcfg;
+ int rc;
+
+ if ( pfx == vex_f2 || !vcpu_has_mpx() )
+ return;
+
+ if ( !mode_ring0() )
+ bndcfg = read_bndcfgu();
+ else if ( !ops->read_msr ||
+ ops->read_msr(MSR_BNDCFGS, &bndcfg, ctxt) != X86EMUL_OKAY )
+ return;
+ if ( (bndcfg & BNDCFG_ENABLE) && !(bndcfg & BNDCFG_PRESERVE) )
+ {
+ /*
+ * Using BNDMK or any other MPX instruction here is pointless, as
+ * we run with MPX disabled ourselves, and hence they're all no-ops.
+ * Therefore we have two ways to clear BNDn: Enable MPX temporarily
+ * (in which case executing any suitable non-prefixed branch
+ * instruction would do), or use XRSTOR.
+ */
+ xstate_set_init(XSTATE_BNDREGS);
+ }
+ done:;
+}
+
int x86emul_unhandleable_rw(
enum x86_segment seg,
unsigned long offset,
@@ -2835,6 +2867,7 @@ x86_emulate(
case 0x70 ... 0x7f: /* jcc (short) */
if ( test_cc(b, _regs.eflags) )
jmp_rel((int32_t)src.val);
+ clear_bnd(ctxt, ops, vex.pfx);
break;
case 0x82: /* Grp1 (x86/32 only) */
@@ -3184,6 +3217,7 @@ x86_emulate(
(rc = ops->insn_fetch(x86_seg_cs, dst.val, NULL, 0, ctxt)) )
goto done;
_regs.eip = dst.val;
+ clear_bnd(ctxt, ops, vex.pfx);
break;
case 0xc4: /* les */ {
@@ -3910,12 +3944,15 @@ x86_emulate(
op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
src.val = _regs.eip;
jmp_rel(rel);
+ clear_bnd(ctxt, ops, vex.pfx);
goto push;
}
case 0xe9: /* jmp (near) */
case 0xeb: /* jmp (short) */
jmp_rel((int32_t)src.val);
+ if ( !(b & 2) )
+ clear_bnd(ctxt, ops, vex.pfx);
break;
case 0xea: /* jmp (far, absolute) */
@@ -4175,12 +4212,14 @@ x86_emulate(
goto done;
_regs.eip = src.val;
src.val = dst.val;
+ clear_bnd(ctxt, ops, vex.pfx);
goto push;
case 4: /* jmp (near) */
if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) )
goto done;
_regs.eip = src.val;
dst.type = OP_NONE;
+ clear_bnd(ctxt, ops, vex.pfx);
break;
case 3: /* call (far, absolute indirect) */
case 5: /* jmp (far, absolute indirect) */ {
@@ -4893,6 +4932,7 @@ x86_emulate(
case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
if ( test_cc(b, _regs.eflags) )
jmp_rel((int32_t)src.val);
+ clear_bnd(ctxt, ops, vex.pfx);
break;
case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */
@@ -473,15 +473,33 @@ bool_t xsave_enabled(const struct vcpu *
int xstate_alloc_save_area(struct vcpu *v)
{
struct xsave_struct *save_area;
+ unsigned int size;
- if ( !cpu_has_xsave || is_idle_vcpu(v) )
+ if ( !cpu_has_xsave )
return 0;
- BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE);
+ if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
+ {
+ size = xsave_cntxt_size;
+ BUG_ON(size < XSTATE_AREA_MIN_SIZE);
+ }
+ else
+ {
+ /*
+ * For idle vcpus on XSAVEC-capable CPUs allocate an area large
+ * enough to save any individual extended state.
+ */
+ unsigned int i;
+
+ for ( size = 0, i = 2; i < xstate_features; ++i )
+ if ( size < xstate_sizes[i] )
+ size = xstate_sizes[i];
+ size += XSTATE_AREA_MIN_SIZE;
+ }
/* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
BUILD_BUG_ON(__alignof(*save_area) < 64);
- save_area = _xzalloc(xsave_cntxt_size, __alignof(*save_area));
+ save_area = _xzalloc(size, __alignof(*save_area));
if ( save_area == NULL )
return -ENOMEM;
@@ -700,6 +718,66 @@ int handle_xsetbv(u32 index, u64 new_bv)
return 0;
}
+uint64_t read_bndcfgu(void)
+{
+ unsigned long cr0 = read_cr0();
+ struct xsave_struct *xstate
+ = idle_vcpu[smp_processor_id()]->arch.xsave_area;
+ const struct xstate_bndcsr *bndcsr;
+
+ ASSERT(cpu_has_mpx);
+ clts();
+
+ if ( cpu_has_xsavec )
+ {
+ asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
+ : "=m" (*xstate)
+ : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) );
+
+ bndcsr = (void *)(xstate + 1);
+ }
+ else
+ {
+ alternative_io(".byte 0x0f,0xae,0x27\n", /* xsave */
+ ".byte 0x0f,0xae,0x37\n", /* xsaveopt */
+ X86_FEATURE_XSAVEOPT,
+ "=m" (*xstate),
+ "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate));
+
+ bndcsr = (void *)xstate + xstate_offsets[_XSTATE_BNDCSR];
+ }
+
+ if ( cr0 & X86_CR0_TS )
+ write_cr0(cr0);
+
+ return xstate->xsave_hdr.xstate_bv & XSTATE_BNDCSR ? bndcsr->bndcfgu : 0;
+}
+
+void xstate_set_init(uint64_t mask)
+{
+ unsigned long cr0 = read_cr0();
+ unsigned long xcr0 = this_cpu(xcr0);
+ struct vcpu *v = idle_vcpu[smp_processor_id()];
+ struct xsave_struct *xstate = v->arch.xsave_area;
+
+ if ( ~xfeature_mask & mask )
+ return;
+
+ if ( (~xcr0 & mask) && !set_xcr0(xcr0 | mask) )
+ return;
+
+ clts();
+
+ memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
+ xrstor(v, mask);
+
+ if ( cr0 & X86_CR0_TS )
+ write_cr0(cr0);
+
+ if ( ~xcr0 & mask )
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, xcr0);
+}
+
/*
* Local variables:
* mode: C
@@ -99,13 +99,20 @@ struct __attribute__((aligned (64))) xsa
char data[]; /* Variable layout states */
};
+struct xstate_bndcsr {
+ uint64_t bndcfgu;
+ uint64_t bndstatus;
+};
+
/* extended state operations */
bool_t __must_check set_xcr0(u64 xfeatures);
uint64_t get_xcr0(void);
void set_msr_xss(u64 xss);
uint64_t get_msr_xss(void);
+uint64_t read_bndcfgu(void);
void xsave(struct vcpu *v, uint64_t mask);
void xrstor(struct vcpu *v, uint64_t mask);
+void xstate_set_init(uint64_t mask);
bool_t xsave_enabled(const struct vcpu *v);
int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,
const struct xsave_hdr *);