@@ -765,6 +765,12 @@ static void __init calculate_hvm_max_pol
* situations until someone has cross-checked the behaviour for safety.
*/
__clear_bit(X86_FEATURE_PKS, fs);
+
+ /*
+ * Don't expose USER_MSR until it is known how (if at all) it is
+ * virtualized on SVM.
+ */
+ __clear_bit(X86_FEATURE_USER_MSR, fs);
}
if ( !cpu_has_vmx_msrlist )
@@ -447,6 +447,10 @@ void domain_cpu_policy_changed(struct do
}
}
+ /* Nested doesn't have the necessary processing, yet. */
+ if ( nestedhvm_enabled(d) && p->feat.user_msr )
+ return /* -EINVAL */;
+
for_each_vcpu ( d, v )
{
cpu_policy_updated(v);
@@ -1374,6 +1374,7 @@ static int cf_check hvm_load_cpu_xsave_s
#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt])
static const uint32_t msrs_to_send[] = {
+ MSR_USER_MSR_CTL,
MSR_SPEC_CTRL,
MSR_INTEL_MISC_FEATURES_ENABLES,
MSR_PKRS,
@@ -1528,6 +1529,7 @@ static int cf_check hvm_load_cpu_msrs(st
{
int rc;
+ case MSR_USER_MSR_CTL:
case MSR_SPEC_CTRL:
case MSR_INTEL_MISC_FEATURES_ENABLES:
case MSR_PKRS:
@@ -676,13 +676,18 @@ static void cf_check vmx_vcpu_destroy(st
}
/*
- * To avoid MSR save/restore at every VM exit/entry time, we restore
- * the x86_64 specific MSRs at domain switch time. Since these MSRs
- * are not modified once set for para domains, we don't save them,
- * but simply reset them to values set in percpu_traps_init().
+ * To avoid MSR save/restore at every VM exit/entry time, we restore the
+ * x86_64 specific MSRs at vcpu switch time. Since these MSRs are not
+ * modified once set for para domains, we don't save them, but simply clear
+ * them or reset them to values set in percpu_traps_init().
*/
-static void vmx_restore_host_msrs(void)
+static void vmx_restore_host_msrs(const struct vcpu *v)
{
+ const struct vcpu_msrs *msrs = v->arch.msrs;
+
+ if ( msrs->user_msr_ctl.enable )
+ wrmsrl(MSR_USER_MSR_CTL, 0);
+
/* No PV guests? No need to restore host SYSCALL infrastructure. */
if ( !IS_ENABLED(CONFIG_PV) )
return;
@@ -736,6 +741,9 @@ static void vmx_restore_guest_msrs(struc
if ( cp->feat.pks )
wrpkrs(msrs->pkrs);
+
+ if ( msrs->user_msr_ctl.enable )
+ wrmsrl(MSR_USER_MSR_CTL, msrs->user_msr_ctl.raw);
}
void vmx_update_cpu_exec_control(struct vcpu *v)
@@ -1178,7 +1186,7 @@ static void cf_check vmx_ctxt_switch_fro
if ( !v->arch.fully_eager_fpu )
vmx_fpu_leave(v);
vmx_save_guest_msrs(v);
- vmx_restore_host_msrs();
+ vmx_restore_host_msrs(v);
vmx_save_dr(v);
if ( v->domain->arch.hvm.pi_ops.flags & PI_CSW_FROM )
@@ -4076,6 +4084,14 @@ static int vmx_handle_apic_write(void)
return vlapic_apicv_write(current, exit_qualification & 0xfff);
}
+static unsigned int user_msr_gpr(void)
+{
+ user_msr_instr_info_t info;
+
+ __vmread(VMX_INSTRUCTION_INFO, &info.raw);
+ return info.gpr;
+}
+
static void undo_nmis_unblocked_by_iret(void)
{
unsigned long guest_info;
@@ -4576,6 +4592,41 @@ void asmlinkage vmx_vmexit_handler(struc
hvm_inject_hw_exception(X86_EXC_GP, 0);
break;
+ case EXIT_REASON_URDMSR:
+ {
+ uint64_t msr_content = 0;
+
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+ switch ( hvm_msr_read_intercept(exit_qualification, &msr_content) )
+ {
+ case X86EMUL_OKAY:
+ *decode_gpr(regs, user_msr_gpr()) = msr_content;
+ update_guest_eip(); /* Safe: URDMSR */
+ break;
+
+ case X86EMUL_EXCEPTION:
+ hvm_inject_hw_exception(X86_EXC_GP, 0);
+ break;
+ }
+ break;
+ }
+
+ case EXIT_REASON_UWRMSR:
+ __vmread(EXIT_QUALIFICATION, &exit_qualification);
+ switch ( hvm_msr_write_intercept(exit_qualification,
+ *decode_gpr(regs, user_msr_gpr()),
+ true) )
+ {
+ case X86EMUL_OKAY:
+ update_guest_eip(); /* Safe: UWRMSR */
+ break;
+
+ case X86EMUL_EXCEPTION:
+ hvm_inject_hw_exception(X86_EXC_GP, 0);
+ break;
+ }
+ break;
+
case EXIT_REASON_VMXOFF:
case EXIT_REASON_VMXON:
case EXIT_REASON_VMCLEAR:
@@ -203,6 +203,8 @@ static inline void pi_clear_sn(struct pi
#define EXIT_REASON_NOTIFY 75
#define EXIT_REASON_RDMSRLIST 78
#define EXIT_REASON_WRMSRLIST 79
+#define EXIT_REASON_URDMSR 80
+#define EXIT_REASON_UWRMSR 81
/* Remember to also update VMX_PERF_EXIT_REASON_SIZE! */
/*
@@ -674,8 +676,18 @@ typedef union ldt_or_tr_instr_info {
base_reg_invalid :1, /* bit 27 - Base register invalid */
instr_identity :1, /* bit 28 - 0:LDT, 1:TR */
instr_write :1, /* bit 29 - 0:store, 1:load */
- :34; /* bits 31:63 - Undefined */
+ :34; /* bits 30:63 - Undefined */
};
} ldt_or_tr_instr_info_t;
+/* VM-Exit instruction info for URDMSR and UWRMSR */
+typedef union user_msr_instr_info {
+ unsigned long raw;
+ struct {
+ unsigned int :3, /* Bits 0:2 - Undefined */
+ gpr :4, /* Bits 3:6 - Source/Destination register */
+ :25; /* bits 7:31 - Undefined */
+ };
+} user_msr_instr_info_t;
+
#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
@@ -301,6 +301,20 @@ uint64_t msr_spec_ctrl_valid_bits(const
struct vcpu_msrs
{
/*
+ * 0x0000001c - MSR_USER_MSR_CTL
+ *
+ * Value is guest chosen, and always loaded in vcpu context.
+ */
+ union {
+ uint64_t raw;
+ struct {
+ bool enable:1;
+ unsigned int :11;
+ unsigned long bitmap:52;
+ };
+ } user_msr_ctl;
+
+ /*
* 0x00000048 - MSR_SPEC_CTRL
* 0xc001011f - MSR_VIRT_SPEC_CTRL (if X86_FEATURE_AMD_SSBD)
*
@@ -6,7 +6,7 @@ PERFCOUNTER_ARRAY(exceptions,
#ifdef CONFIG_HVM
-#define VMX_PERF_EXIT_REASON_SIZE 80
+#define VMX_PERF_EXIT_REASON_SIZE 82
#define VMEXIT_NPF_PERFC 143
#define SVM_PERF_EXIT_REASON_SIZE (VMEXIT_NPF_PERFC + 1)
PERFCOUNTER_ARRAY(vmexits, "vmexits",
@@ -206,6 +206,12 @@ int guest_rdmsr(struct vcpu *v, uint32_t
*val = msrs->xss.raw;
break;
+ case MSR_USER_MSR_CTL:
+ if ( !cp->feat.user_msr )
+ goto gp_fault;
+ *val = msrs->user_msr_ctl.raw;
+ break;
+
case 0x40000000 ... 0x400001ff:
if ( is_viridian_domain(d) )
{
@@ -536,6 +542,19 @@ int guest_wrmsr(struct vcpu *v, uint32_t
msrs->xss.raw = val;
break;
+ case MSR_USER_MSR_CTL:
+ if ( !cp->feat.user_msr )
+ goto gp_fault;
+
+ if ( (val & ~(USER_MSR_ENABLE | USER_MSR_ADDR_MASK)) ||
+ !is_canonical_address(val) )
+ goto gp_fault;
+
+ msrs->user_msr_ctl.raw = val;
+ if ( v == curr )
+ wrmsrl(MSR_USER_MSR_CTL, val);
+ break;
+
case 0x40000000 ... 0x400001ff:
if ( is_viridian_domain(d) )
{
@@ -350,7 +350,7 @@ XEN_CPUFEATURE(AVX_NE_CONVERT, 15*32
XEN_CPUFEATURE(AMX_COMPLEX, 15*32+ 8) /* AMX Complex Instructions */
XEN_CPUFEATURE(AVX_VNNI_INT16, 15*32+10) /*A AVX-VNNI-INT16 Instructions */
XEN_CPUFEATURE(PREFETCHI, 15*32+14) /*A PREFETCHIT{0,1} Instructions */
-XEN_CPUFEATURE(USER_MSR, 15*32+15) /* U{RD,WR}MSR Instructions */
+XEN_CPUFEATURE(USER_MSR, 15*32+15) /*s U{RD,WR}MSR Instructions */
XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */
/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */
Hook up the new VM exit codes and handle guest accesses, context switch, and save/restore. At least for now don't allow the guest direct access to the control MSR; this may need changing if guests were to frequently access it (e.g. on their own context switch path). While there also correct a one-off in union ldt_or_tr_instr_info's comment. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Needing to change two places in hvm.c continues to be unhelpful; I recall I already did forget to also adjust hvm_load_cpu_msrs() for XFD. Considering that MSRs typically arrive in the order the table has it, couldn't we incrementally look up the incoming MSR index there, falling back to a full lookup only when the incremental lookup failed (and thus not normally re-iterating through the initial part of the array)? Said comment in union ldt_or_tr_instr_info is further odd (same for union gdt_or_idt_instr_info's) in that Instruction Information is only a 32-bit field. Hence bits 32-63 aren't undefined, but simply don't exist. RFC: The wee attempt to "deal" with nested is likely wrong, but I'm afraid I simply don't know where such enforcement would be done properly. Returning an error there is also commented out, for domain_cpu_policy_changed() returning void without "x86/xstate: re-size save area when CPUID policy changes" in place. --- v5: Introduce user_msr_gpr(). v4: New.