@@ -125,7 +125,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -1078,6 +1078,7 @@
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
+#define VMX_BASIC_NO_HW_ERROR_CODE 0x0100000000000000LLU
/* Resctrl MSRs: */
/* - Intel: */
@@ -624,7 +624,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC) | F(BUS_LOCK_DETECT)
+ F(SGX_LC) | F(BUS_LOCK_DETECT) | F(SHSTK)
);
/* Set LA57 based on hardware capability. */
if (cpuid_ecx(7) & F(LA57))
@@ -642,7 +642,8 @@ void kvm_set_cpu_caps(void)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
+ F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) |
+ F(IBT)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -655,6 +656,13 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
+ /*
+ * The feature bit in boot_cpu_data.x86_capability could have been
+ * cleared due to ibt=off cmdline option, then add it back if CPU
+ * supports IBT.
+ */
+ if (cpuid_edx(7) & F(IBT))
+ kvm_cpu_cap_set(X86_FEATURE_IBT);
kvm_cpu_cap_mask(CPUID_7_1_EAX,
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
@@ -79,6 +79,12 @@ static inline bool cpu_has_vmx_basic_inout(void)
return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
}
+static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
+{
+ return ((u64)vmcs_config.basic_cap << 32) &
+ VMX_BASIC_NO_HW_ERROR_CODE;
+}
+
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
@@ -2641,6 +2641,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -2761,7 +2762,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
+ vmcs_conf->basic_cap = vmx_msr_high & ~0x7fff;
vmcs_conf->revision_id = vmx_msr_low;
@@ -6359,6 +6360,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
+ if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
+ pr_err("INTR SSP TABLE = 0x%016lx\n",
+ vmcs_readl(GUEST_INTR_SSP_TABLE));
+ }
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6436,6 +6443,12 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
pr_err("Virtual processor ID = 0x%04x\n",
vmcs_read16(VIRTUAL_PROCESSOR_ID));
+ if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
+ pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
+ pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
+ pr_err("INTR SSP TABLE = 0x%016lx\n",
+ vmcs_readl(HOST_INTR_SSP_TABLE));
+ }
}
/*
@@ -7966,6 +7979,13 @@ static __init void vmx_set_cpu_caps(void)
if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+ if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ !cpu_has_vmx_basic_no_hw_errcode()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~CET_XSTATE_MASK;
+ }
}
static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -481,7 +481,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_CET_STATE)
#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -503,7 +504,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_LOAD_CET_STATE)
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \
@@ -228,7 +228,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
-#define KVM_SUPPORTED_XSS 0
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER)
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
@@ -9648,6 +9648,20 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_ops_update(ops);
+ if (!kvm_is_cet_supported()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
+
+ /*
+ * If SHSTK and IBT are not available in KVM, clear CET user bit in
+ * kvm_caps.supported_xss so that kvm_is_cet__supported() returns
+ * false when called.
+ */
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~CET_XSTATE_MASK;
+
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, kvm_x86_check_cpu_compat, &r, 1);
if (r < 0)
@@ -538,6 +538,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
+ if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
+ !__cpu_has(__c, X86_FEATURE_IBT)) \
+ __reserved_bits |= X86_CR4_CET; \
__reserved_bits; \
})
Enable CET related feature bits in KVM capabilities array and make X86_CR4_CET available to guest. Remove the feature bits if host side dependencies cannot be met. Set the feature bits so that CET features are available in guest CPUID. Add CR4.CET bit support in order to allow guest set CET master control bit(CR4.CET). Disable KVM CET feature if unrestricted_guest is unsupported/disabled as KVM does not support emulating CET. Don't expose CET feature if dependent CET bit(U_CET) is cleared in host XSS or if XSAVES isn't supported. The CET bits in VM_ENTRY/VM_EXIT control fields should be set to make guest CET states isolated from host side. CET is only available on platforms that enumerate VMX_BASIC[bit 56] as 1. Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/cpuid.c | 12 ++++++++++-- arch/x86/kvm/vmx/capabilities.h | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 22 +++++++++++++++++++++- arch/x86/kvm/vmx/vmx.h | 6 ++++-- arch/x86/kvm/x86.c | 16 +++++++++++++++- arch/x86/kvm/x86.h | 3 +++ 8 files changed, 62 insertions(+), 7 deletions(-)