diff mbox series

[v5,14/19] KVM:VMX: Set host constant supervisor states to VMCS fields

Message ID 20230803042732.88515-15-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Enable CET Virtualization | expand

Commit Message

Yang, Weijiang Aug. 3, 2023, 4:27 a.m. UTC
Set constant values to HOST_{S_CET,SSP,INTR_SSP_TABLE} VMCS
fields explicitly. Kernel IBT is supported and the setting in
MSR_IA32_S_CET is static after post-boot(except is BIOS call
case but vCPU thread never across it.), i.e. KVM doesn't need
to refresh HOST_S_CET field before every VM-Enter/VM-Exit
sequence.

Host supervisor shadow stack is not enabled now and SSP is not
accessible to kernel mode, thus it's safe to set host IA32_INT_
SSP_TAB/SSP VMCS fields to 0s. When shadow stack is enabled for
CPL3, SSP is reloaded from IA32_PL3_SSP before it exits to userspace.
Check SDM Vol 2A/B Chapter 3/4 for SYSCALL/SYSRET/SYSENTER SYSEXIT/
RDSSP/CALL etc.

Prevent KVM module loading and if host supervisor shadow stack
SHSTK_EN is set in MSR_IA32_S_CET as KVM cannot co-exit with it
correctly.

Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/capabilities.h |  4 ++++
 arch/x86/kvm/vmx/vmx.c          | 15 +++++++++++++++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 arch/x86/kvm/x86.h              |  1 +
 4 files changed, 34 insertions(+)

Comments

Chao Gao Aug. 4, 2023, 8:23 a.m. UTC | #1
On Thu, Aug 03, 2023 at 12:27:27AM -0400, Yang Weijiang wrote:
>Set constant values to HOST_{S_CET,SSP,INTR_SSP_TABLE} VMCS
>fields explicitly. Kernel IBT is supported and the setting in
>MSR_IA32_S_CET is static after post-boot(except is BIOS call
>case but vCPU thread never across it.), i.e. KVM doesn't need
>to refresh HOST_S_CET field before every VM-Enter/VM-Exit
>sequence.
>
>Host supervisor shadow stack is not enabled now and SSP is not
>accessible to kernel mode, thus it's safe to set host IA32_INT_
>SSP_TAB/SSP VMCS fields to 0s. When shadow stack is enabled for
>CPL3, SSP is reloaded from IA32_PL3_SSP before it exits to userspace.
>Check SDM Vol 2A/B Chapter 3/4 for SYSCALL/SYSRET/SYSENTER SYSEXIT/
>RDSSP/CALL etc.
>
>Prevent KVM module loading and if host supervisor shadow stack
>SHSTK_EN is set in MSR_IA32_S_CET as KVM cannot co-exit with it
>correctly.
>
>Suggested-by: Sean Christopherson <seanjc@google.com>
>Suggested-by: Chao Gao <chao.gao@intel.com>
>Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>

Reviewed-by: Chao Gao <chao.gao@intel.com>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d0abee35d7ba..b1883f6c08eb 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -106,6 +106,10 @@  static inline bool cpu_has_load_perf_global_ctrl(void)
 	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
 }
 
+static inline bool cpu_has_load_cet_ctrl(void)
+{
+	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
+}
 static inline bool cpu_has_vmx_mpx(void)
 {
 	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6779b8a63789..99bf63b2a779 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4341,6 +4341,21 @@  void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 
 	if (cpu_has_load_ia32_efer())
 		vmcs_write64(HOST_IA32_EFER, host_efer);
+
+	/*
+	 * Supervisor shadow stack is not enabled on host side, i.e.,
+	 * host IA32_S_CET.SHSTK_EN bit is guaranteed to 0 now, per SDM
+	 * description(RDSSP instruction), SSP is not readable in CPL0,
+	 * so resetting the two registers to 0s at VM-Exit does no harm
+	 * to kernel execution. When execution flow exits to userspace,
+	 * SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
+	 * 3 and 4 for details.
+	 */
+	if (cpu_has_load_cet_ctrl()) {
+		vmcs_writel(HOST_S_CET, host_s_cet);
+		vmcs_writel(HOST_SSP, 0);
+		vmcs_writel(HOST_INTR_SSP_TABLE, 0);
+	}
 }
 
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 56aa5a3d3913..01b4f10fa8ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -113,6 +113,8 @@  static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #endif
 
 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
+u64 __read_mostly host_s_cet;
+EXPORT_SYMBOL_GPL(host_s_cet);
 
 #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
 
@@ -9615,6 +9617,18 @@  static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 		return -EIO;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_SHSTK)) {
+		rdmsrl(MSR_IA32_S_CET, host_s_cet);
+		/*
+		 * Linux doesn't yet support supervisor shadow stacks (SSS), so
+		 * KVM doesn't save/restore the associated MSRs, i.e. KVM may
+		 * clobber the host values.  Yell and refuse to load if SSS is
+		 * unexpectedly enabled, e.g. to avoid crashing the host.
+		 */
+		if (WARN_ON_ONCE(host_s_cet & CET_SHSTK_EN))
+			return -EIO;
+	}
+
 	x86_emulator_cache = kvm_alloc_emulator_cache();
 	if (!x86_emulator_cache) {
 		pr_err("failed to allocate cache for x86 emulator\n");
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3b79d6db2f83..e42e5263fcf7 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -323,6 +323,7 @@  fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 host_xss;
+extern u64 host_s_cet;
 
 extern struct kvm_caps kvm_caps;