@@ -74,6 +74,8 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
#endif
#ifdef CONFIG_CC_IS_GCC
+#define ASM_RDMSR_IMM \
+ " .insn VEX.128.F2.M7.W0 0xf6 /0, %[msr]%{:u32}, %[val]\n\t"
#define ASM_WRMSRNS_IMM \
" .insn VEX.128.F3.M7.W0 0xf6 /0, %[val], %[msr]%{:u32}\n\t"
#endif
@@ -85,10 +87,17 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
* The register operand is encoded as %rax because all uses of the immediate
* form MSR access instructions reference %rax as the register operand.
*/
+#define ASM_RDMSR_IMM \
+ " .byte 0xc4,0xe7,0x7b,0xf6,0xc0; .long %c[msr]"
#define ASM_WRMSRNS_IMM \
" .byte 0xc4,0xe7,0x7a,0xf6,0xc0; .long %c[msr]"
#endif
+#define RDMSR_AND_SAVE_RESULT \
+ "rdmsr\n\t" \
+ "shl $0x20, %%rdx\n\t" \
+ "or %%rdx, %[val]\n\t"
+
#define PREPARE_RDX_FOR_WRMSR \
"mov %%rax, %%rdx\n\t" \
"shr $0x20, %%rdx\n\t"
@@ -142,6 +151,7 @@ static __always_inline enum pv_msr_action get_pv_msr_action(const u32 msr)
}
}
+extern void asm_xen_read_msr(void);
extern void asm_xen_write_msr(void);
#else
static __always_inline enum pv_msr_action get_pv_msr_action(const u32 msr)
@@ -150,35 +160,95 @@ static __always_inline enum pv_msr_action get_pv_msr_action(const u32 msr)
}
#endif
-static __always_inline unsigned long long __rdmsr(unsigned int msr)
+static __always_inline bool __native_rdmsr_variable(const u32 msr, u64 *val, const int type)
{
- DECLARE_ARGS(val, low, high);
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(__builtin_constant_p(msr));
- asm volatile("1: rdmsr\n"
- "2:\n"
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
- : EAX_EDX_RET(val, low, high) : "c" (msr));
+ asm_inline volatile goto(
+ "1:\n"
+ RDMSR_AND_SAVE_RESULT
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For RDMSR */
- return EAX_EDX_VAL(val, low, high);
+ : [val] "=a" (*val)
+ : "c" (msr), [type] "i" (type)
+ : "memory", "rdx"
+ : badmsr);
+#else
+ asm_inline volatile goto(
+ "1: rdmsr\n\t"
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For RDMSR */
+
+ : "=A" (*val)
+ : "c" (msr), [type] "i" (type)
+ : "memory"
+ : badmsr);
+#endif
+
+ return false;
+
+badmsr:
+ return true;
+}
+
+#ifdef CONFIG_X86_64
+static __always_inline bool __native_rdmsr_constant(const u32 msr, u64 *val, const int type)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+ asm_inline volatile goto(
+ "1:\n"
+ ALTERNATIVE("mov %[msr], %%ecx\n\t"
+ "2:\n"
+ RDMSR_AND_SAVE_RESULT,
+ ASM_RDMSR_IMM,
+ X86_FEATURE_MSR_IMM)
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For RDMSR immediate */
+ _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For RDMSR */
+
+ : [val] "=a" (*val)
+ : [msr] "i" (msr), [type] "i" (type)
+ : "memory", "ecx", "rdx"
+ : badmsr);
+
+ return false;
+
+badmsr:
+ return true;
+}
+#endif
+
+static __always_inline bool __native_rdmsr(const u32 msr, u64 *val, const int type)
+{
+#ifdef CONFIG_X86_64
+ if (__builtin_constant_p(msr))
+ return __native_rdmsr_constant(msr, val, type);
+#endif
+
+ return __native_rdmsr_variable(msr, val, type);
}
-#define native_rdmsr(msr, val1, val2) \
+#define native_rdmsr(msr, low, high) \
do { \
- u64 __val = __rdmsr((msr)); \
- (void)((val1) = (u32)__val); \
- (void)((val2) = (u32)(__val >> 32)); \
+ u64 __val = 0; \
+ __native_rdmsr((msr), &__val, EX_TYPE_RDMSR); \
+ (void)((low) = (u32)__val); \
+ (void)((high) = (u32)(__val >> 32)); \
} while (0)
static __always_inline u64 native_rdmsrl(const u32 msr)
{
- return __rdmsr(msr);
+ u64 val = 0;
+
+ __native_rdmsr(msr, &val, EX_TYPE_RDMSR);
+ return val;
}
-static inline unsigned long long native_read_msr(unsigned int msr)
+static inline u64 native_read_msr(const u32 msr)
{
- unsigned long long val;
+ u64 val = 0;
- val = __rdmsr(msr);
+ __native_rdmsr(msr, &val, EX_TYPE_RDMSR);
if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, val, 0);
@@ -186,19 +256,139 @@ static inline unsigned long long native_read_msr(unsigned int msr)
return val;
}
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
- int *err)
+static inline u64 native_read_msr_safe(const u32 msr, int *err)
{
- DECLARE_ARGS(val, low, high);
+ u64 val = 0;
+
+ *err = __native_rdmsr(msr, &val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0;
- asm volatile("1: rdmsr ; xor %[err],%[err]\n"
- "2:\n\t"
- _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_RDMSR_SAFE, %[err])
- : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
- : "c" (msr));
if (tracepoint_enabled(read_msr))
- do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
- return EAX_EDX_VAL(val, low, high);
+ do_trace_read_msr(msr, val, *err);
+
+ return val;
+}
+
+static __always_inline bool __rdmsr_variable(const u32 msr, u64 *val, const int type)
+{
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(__builtin_constant_p(msr));
+
+ asm_inline volatile goto(
+ "1:\n"
+ ALTERNATIVE(RDMSR_AND_SAVE_RESULT,
+ "call asm_xen_read_msr\n\t",
+ X86_FEATURE_XENPV)
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For RDMSR and CALL */
+
+ : [val] "=a" (*val), ASM_CALL_CONSTRAINT
+ : "c" (msr), [type] "i" (type)
+ : "memory", "rdx"
+ : badmsr);
+
+ return false;
+
+badmsr:
+ return true;
+#else
+ return __native_rdmsr_variable(msr, val, type);
+#endif
+}
+
+static __always_inline bool __rdmsr_variable_all(const u32 msr, u64 *val, const int type)
+{
+ const enum pv_msr_action action = get_pv_msr_action(msr);
+
+ if (action == PV_MSR_PV) {
+ return __rdmsr_variable(msr, val, type);
+ } else if (action == PV_MSR_IGNORE) {
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+ }
+
+ return __native_rdmsr_variable(msr, val, type);
+}
+
+#ifdef CONFIG_X86_64
+static __always_inline bool __rdmsr_constant(const u32 msr, u64 *val, const int type)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(msr));
+
+ asm_inline volatile goto(
+ "1:\n"
+ ALTERNATIVE_2("mov %[msr], %%ecx\n\t"
+ "2:\n"
+ RDMSR_AND_SAVE_RESULT,
+ ASM_RDMSR_IMM,
+ X86_FEATURE_MSR_IMM,
+ "mov %[msr], %%ecx\n\t"
+ "call asm_xen_read_msr\n\t",
+ X86_FEATURE_XENPV)
+ _ASM_EXTABLE_TYPE(1b, %l[badmsr], %c[type]) /* For RDMSR immediate */
+ _ASM_EXTABLE_TYPE(2b, %l[badmsr], %c[type]) /* For RDMSR and CALL */
+
+ : [val] "=a" (*val), ASM_CALL_CONSTRAINT
+ : [msr] "i" (msr), [type] "i" (type)
+ : "memory", "ecx", "rdx"
+ : badmsr);
+
+ return false;
+
+badmsr:
+ return true;
+}
+
+static __always_inline bool __rdmsr_constant_all(const u32 msr, u64 *val, const int type)
+{
+ const enum pv_msr_action action = get_pv_msr_action(msr);
+
+ if (action == PV_MSR_PV) {
+ return __rdmsr_constant(msr, val, type);
+ } else if (action == PV_MSR_IGNORE) {
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+ }
+
+ return __native_rdmsr_constant(msr, val, type);
+}
+#endif
+
+static __always_inline bool __rdmsr(const u32 msr, u64 *val, const int type)
+{
+#ifdef CONFIG_X86_64
+ if (__builtin_constant_p(msr))
+ return __rdmsr_constant_all(msr, val, type);
+#endif
+
+ return __rdmsr_variable_all(msr, val, type);
+}
+
+#define rdmsr(msr, low, high) \
+do { \
+ u64 __val = 0; \
+ __rdmsr((msr), &__val, EX_TYPE_RDMSR); \
+ (void)((low) = (u32)__val); \
+ (void)((high) = (u32)(__val >> 32)); \
+} while (0)
+
+#define rdmsrl(msr, val) \
+do { \
+ u64 __val = 0; \
+ __rdmsr((msr), &__val, EX_TYPE_RDMSR); \
+ (val) = __val; \
+} while (0)
+
+#define rdmsr_safe(msr, low, high) \
+({ \
+ u64 __val = 0; \
+ int __err = __rdmsr((msr), &__val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0; \
+ (*low) = (u32)__val; \
+ (*high) = (u32)(__val >> 32); \
+ __err; \
+})
+
+static __always_inline int rdmsrl_safe(const u32 msr, u64 *val)
+{
+ return __rdmsr(msr, val, EX_TYPE_RDMSR_SAFE) ? -EIO : 0;
}
/*
@@ -503,40 +693,6 @@ static inline unsigned long long native_read_pmc(int counter)
* Note: the rd* operations modify the parameters directly (without using
* pointer indirection), this allows gcc to optimize better
*/
-
-#define rdmsr(msr, low, high) \
-do { \
- u64 __val = native_read_msr((msr)); \
- (void)((low) = (u32)__val); \
- (void)((high) = (u32)(__val >> 32)); \
-} while (0)
-
-#define rdmsrl(msr, val) \
- ((val) = native_read_msr((msr)))
-
-static inline void wrmsrl(u32 msr, u64 val)
-{
- native_write_msr(msr, val);
-}
-
-/* rdmsr with exception handling */
-#define rdmsr_safe(msr, low, high) \
-({ \
- int __err; \
- u64 __val = native_read_msr_safe((msr), &__err); \
- (*low) = (u32)__val; \
- (*high) = (u32)(__val >> 32); \
- __err; \
-})
-
-static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
-{
- int err;
-
- *p = native_read_msr_safe(msr, &err);
- return err;
-}
-
#define rdpmc(counter, low, high) \
do { \
u64 _l = native_read_pmc((counter)); \
@@ -175,46 +175,6 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-static inline u64 paravirt_read_msr(unsigned msr)
-{
- return PVOP_CALL1(u64, cpu.read_msr, msr);
-}
-
-static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
-{
- return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err);
-}
-
-#define rdmsr(msr, val1, val2) \
-do { \
- u64 _l = paravirt_read_msr(msr); \
- val1 = (u32)_l; \
- val2 = _l >> 32; \
-} while (0)
-
-#define rdmsrl(msr, val) \
-do { \
- val = paravirt_read_msr(msr); \
-} while (0)
-
-/* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b) \
-({ \
- int _err; \
- u64 _l = paravirt_read_msr_safe(msr, &_err); \
- (*a) = (u32)_l; \
- (*b) = _l >> 32; \
- _err; \
-})
-
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
-{
- int err;
-
- *p = paravirt_read_msr_safe(msr, &err);
- return err;
-}
-
static inline unsigned long long paravirt_read_pmc(int counter)
{
return PVOP_CALL1(u64, cpu.read_pmc, counter);
@@ -90,15 +90,6 @@ struct pv_cpu_ops {
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
- /* Unsafe MSR operations. These will warn or panic on failure. */
- u64 (*read_msr)(unsigned int msr);
-
- /*
- * Safe MSR operations.
- * read sets err to 0 or -EIO. write returns 0 or -EIO.
- */
- u64 (*read_msr_safe)(unsigned int msr, int *err);
-
u64 (*read_pmc)(int counter);
void (*start_context_switch)(struct task_struct *prev);
@@ -128,8 +128,6 @@ struct paravirt_patch_template pv_ops = {
.cpu.read_cr0 = native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
- .cpu.read_msr = native_read_msr,
- .cpu.read_msr_safe = native_read_msr_safe,
.cpu.read_pmc = native_read_pmc,
.cpu.load_tr_desc = native_load_tr_desc,
.cpu.set_ldt = native_set_ldt,
@@ -1087,19 +1087,26 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-static u64 xen_do_read_msr(unsigned int msr, int *err)
+/*
+ * Return true in xen_rdmsr_ret_type to indicate the requested MSR read has
+ * been done successfully.
+ */
+struct xen_rdmsr_ret_type xen_do_read_msr(u32 msr)
{
- u64 val = 0; /* Avoid uninitialized value for safe variant. */
- bool emulated;
+ struct xen_rdmsr_ret_type ret;
- if (pmu_msr_chk_emulated(msr, &val, true, &emulated) && emulated)
- return val;
+ ret.done = true;
- if (err)
- val = native_read_msr_safe(msr, err);
- else
- val = native_read_msr(msr);
+ if (pmu_msr_chk_emulated(msr, &ret.val, true, &ret.done) && ret.done)
+ return ret;
+
+ ret.val = 0;
+ ret.done = false;
+ return ret;
+}
+u64 xen_do_read_msr_fixup(u32 msr, u64 val)
+{
switch (msr) {
case MSR_IA32_APICBASE:
val &= ~X2APIC_ENABLE;
@@ -1108,7 +1115,11 @@ static u64 xen_do_read_msr(unsigned int msr, int *err)
else
val &= ~MSR_IA32_APICBASE_BSP;
break;
+
+ default:
+ break;
}
+
return val;
}
@@ -1160,18 +1171,6 @@ bool xen_do_write_msr(u32 msr, u64 val)
}
}
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
-{
- return xen_do_read_msr(msr, err);
-}
-
-static u64 xen_read_msr(unsigned int msr)
-{
- int err;
-
- return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
-}
-
/* This is called once we have the cpu_possible_mask */
void __init xen_setup_vcpu_info_placement(void)
{
@@ -1206,10 +1205,6 @@ static const typeof(pv_ops) xen_cpu_ops __initconst = {
.write_cr4 = xen_write_cr4,
- .read_msr = xen_read_msr,
-
- .read_msr_safe = xen_read_msr_safe,
-
.read_pmc = xen_read_pmc,
.load_tr_desc = paravirt_nop,
@@ -399,3 +399,37 @@ SYM_CODE_END(xen_entry_SYSCALL_compat)
RET
SYM_FUNC_END(asm_xen_write_msr)
EXPORT_SYMBOL_GPL(asm_xen_write_msr)
+
+/*
+ * The prototype of the Xen C code:
+ * struct { u64 val, bool done } xen_do_read_msr(u32 msr)
+ */
+SYM_FUNC_START(asm_xen_read_msr)
+ ENDBR
+ FRAME_BEGIN
+ XEN_SAVE_CALLEE_REGS_FOR_MSR
+ mov %ecx, %edi /* MSR number */
+ call xen_do_read_msr
+ test %dl, %dl /* %dl=1, i.e., ZF=0, meaning successfully done */
+ XEN_RESTORE_CALLEE_REGS_FOR_MSR
+ jnz 2f
+
+1: rdmsr
+ _ASM_EXTABLE_FUNC_REWIND(1b, -5, FRAME_OFFSET / (BITS_PER_LONG / 8))
+ shl $0x20, %rdx
+ or %rdx, %rax
+ /*
+ * The top of the stack points directly at the return address;
+ * back up by 5 bytes from the return address.
+ */
+
+ XEN_SAVE_CALLEE_REGS_FOR_MSR
+ mov %ecx, %edi
+ mov %rax, %rsi
+ call xen_do_read_msr_fixup
+ XEN_RESTORE_CALLEE_REGS_FOR_MSR
+
+2: FRAME_END
+ RET
+SYM_FUNC_END(asm_xen_read_msr)
+EXPORT_SYMBOL_GPL(asm_xen_read_msr)
@@ -146,7 +146,14 @@ __visible unsigned long xen_read_cr2_direct(void);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
+struct xen_rdmsr_ret_type {
+ u64 val;
+ bool done;
+};
+
extern bool xen_do_write_msr(u32 msr, u64 val);
+extern struct xen_rdmsr_ret_type xen_do_read_msr(u32 msr);
+extern u64 xen_do_read_msr_fixup(u32 msr, u64 val);
extern int xen_panic_handler_init(void);
Also add support for the immediate form MSR read support. Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com> Signed-off-by: Xin Li (Intel) <xin@zytor.com> --- arch/x86/include/asm/msr.h | 274 ++++++++++++++++++++------ arch/x86/include/asm/paravirt.h | 40 ---- arch/x86/include/asm/paravirt_types.h | 9 - arch/x86/kernel/paravirt.c | 2 - arch/x86/xen/enlighten_pv.c | 45 ++--- arch/x86/xen/xen-asm.S | 34 ++++ arch/x86/xen/xen-ops.h | 7 + 7 files changed, 276 insertions(+), 135 deletions(-)