Message ID | 20191101085222.27997-4-weijiang.yang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce support for guest CET feature | expand |
On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > CET MSRs pass through Guest directly to enhance performance. > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > these MSRs are defined in kernel and re-used here. > > MSR_IA32_U_CET and MSR_IA32_PL3_SSP are used for user mode protection, > the contents could differ from process to process, therefore, > kernel needs to save/restore them during context switch, it makes > sense to pass through them so that the guest kernel can > use xsaves/xrstors to operate them efficiently. Other MSRs are used > for non-user mode protection. See CET spec for detailed info. > > The difference between CET VMCS state fields and xsave components is that, > the former used for CET state storage during VMEnter/VMExit, > whereas the latter used for state retention between Guest task/process > switch. > > Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com> > Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > --- > arch/x86/kvm/cpuid.c | 4 +-- > arch/x86/kvm/cpuid.h | 2 ++ > arch/x86/kvm/vmx/vmx.c | 65 ++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 69 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index dd387a785c1e..4166c4fcad1e 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -371,13 +371,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) > F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | > F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | > F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | > - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); > + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK); > > /* cpuid 7.0.edx*/ > const u32 kvm_cpuid_7_0_edx_x86_features = > F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | > F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | > - F(MD_CLEAR); > + F(MD_CLEAR) | F(IBT); Advertising CET to userspace/guest needs to be done at the end of the series, or at least after CR4.CET is no longer reserved, e.g. KVM_SET_SREGS will fail and the guest will get a #GP when trying to set CR4.CET. I'm pretty sure I've said this at least twice in previous versions of this series... > > /* cpuid 7.1.eax */ > const u32 kvm_cpuid_7_1_eax_x86_features = > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index d78a61408243..1d77b880084d 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -27,6 +27,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, > > int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); > > +u64 kvm_supported_xss(void); > + > static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) > { > return vcpu->arch.maxphyaddr; > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index a84198cff397..db03d9dc1297 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2918,6 +2918,24 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > vmcs_writel(GUEST_CR3, guest_cr3); > } > > +static bool guest_cet_allowed(struct kvm_vcpu *vcpu, u32 feature, u32 mode) > +{ > + u64 kvm_xss = kvm_supported_xss(); > + > + /* > + * Sanity check for guest CET dependencies, guest_cpu_has(SHSTK|IBT) has > + * implied corresponding host CET status check. > + */ > + if (feature == X86_FEATURE_SHSTK) > + return guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) && > + (kvm_xss & mode); > + else if (feature == X86_FEATURE_IBT) > + return guest_cpuid_has(vcpu, X86_FEATURE_IBT) && > + (kvm_xss & mode); > + > + return false; > +} > + > int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -7001,6 +7019,50 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) > vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); > } > > +static void vmx_pass_cet_msrs(struct kvm_vcpu *vcpu) "pass" isn't accurate, this function also does the opposite. Maybe vmx_update_intercept_for_cet_msr()? Or reuse the PT naming and go with cet_update_intercept_for_msr()? > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; > + > + /* > + * U_CET is required for USER CET, per CET spec., meanwhile U_CET and > + * PL3_SPP are a bundle for USER CET xsaves. > + */ > + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_USER) || > + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_USER)) { IMO, the guest_cet_allowed() wrappers do more harm than good, e.g. I find this easier to understand because it doesn't require digging into a random helper. if ((kvm_supported_xss() & XFEATURE_MASK_CET_USER) && (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) || guest_cpuid_has(vcpu, X86_FEATURE_IBT))) > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW); > + } else { > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW, true); > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW, true); > + } > + /* > + * S_CET is required for KERNEL CET, meanwhile PL0_SSP ... PL2_SSP are a bundle > + * for CET KERNEL xsaves. > + */ > + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_KERNEL) || > + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_KERNEL)) { > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW); > + > + /* SSP_TAB only available for KERNEL SHSTK.*/ > + if (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK)) > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, > + MSR_TYPE_RW); > + else > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, > + MSR_TYPE_RW, true); > + } else { > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW, true); > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW, true); > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW, true); > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW, true); > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW, true); > + } > +} > + > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > update_intel_pt_cfg(vcpu); > + > + if (!is_guest_mode(vcpu)) > + vmx_pass_cet_msrs(vcpu); Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off while in guest mode would put KVM in a weird state as the msr bitmap for L1 would still allow L1 to access the CET MSRs. Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we kill that path entirely with -EINVAL? > } > > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) > -- > 2.17.2 >
On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > CET MSRs pass through Guest directly to enhance performance. > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > these MSRs are defined in kernel and re-used here. > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > index dd387a785c1e..4166c4fcad1e 100644 > > --- a/arch/x86/kvm/cpuid.c > > +++ b/arch/x86/kvm/cpuid.c > > @@ -371,13 +371,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) > > F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | > > F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | > > F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | > > - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); > > + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK); > > > > /* cpuid 7.0.edx*/ > > const u32 kvm_cpuid_7_0_edx_x86_features = > > F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | > > F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | > > - F(MD_CLEAR); > > + F(MD_CLEAR) | F(IBT); > > Advertising CET to userspace/guest needs to be done at the end of the > series, or at least after CR4.CET is no longer reserved, e.g. KVM_SET_SREGS > will fail and the guest will get a #GP when trying to set CR4.CET. > > I'm pretty sure I've said this at least twice in previous versions of > this series... Thanks Sean for picking these up! The reason is, starting from this patch, I'm using guest_cpuid_has(CET) to check the availability of guest CET CPUID, so logically I would like to let the readers understand CET related CPUID word is defined as above. But no problem, I can move these definitions to a latter patch as the patchset only meaningful as a whole. > > > > > /* cpuid 7.1.eax */ > > const u32 kvm_cpuid_7_1_eax_x86_features = > > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > > index d78a61408243..1d77b880084d 100644 > > --- a/arch/x86/kvm/cpuid.h > > +++ b/arch/x86/kvm/cpuid.h > > @@ -27,6 +27,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, > > > > +static void vmx_pass_cet_msrs(struct kvm_vcpu *vcpu) > > "pass" isn't accurate, this function also does the opposite. Maybe > vmx_update_intercept_for_cet_msr()? Or reuse the PT naming and go with > cet_update_intercept_for_msr()? > Sure, will change it. > > +{ > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; > > + > > + /* > > + * U_CET is required for USER CET, per CET spec., meanwhile U_CET and > > + * PL3_SPP are a bundle for USER CET xsaves. > > + */ > > + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_USER) || > > + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_USER)) { > > IMO, the guest_cet_allowed() wrappers do more harm than good, e.g. I find > this easier to understand because it doesn't require digging into a random > helper. > > if ((kvm_supported_xss() & XFEATURE_MASK_CET_USER) && > (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) || > guest_cpuid_has(vcpu, X86_FEATURE_IBT))) > Hmm, sounds like it's an unnecessary wrapper, will remove it, thanks! > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW); > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW); > > + } else { > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW, true); > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW, true); > > + } > > + /* > > + * S_CET is required for KERNEL CET, meanwhile PL0_SSP ... PL2_SSP are a bundle > > + * for CET KERNEL xsaves. > > + */ > > + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_KERNEL) || > > + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_KERNEL)) { > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW); > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW); > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW); > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW); > > + > > + /* SSP_TAB only available for KERNEL SHSTK.*/ > > + if (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK)) > > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, > > + MSR_TYPE_RW); > > + else > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, > > + MSR_TYPE_RW, true); > > + } else { > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW, true); > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW, true); > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW, true); > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW, true); > > + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW, true); > > + } > > +} > > + > > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > { > > struct vcpu_vmx *vmx = to_vmx(vcpu); > > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > > update_intel_pt_cfg(vcpu); > > + > > + if (!is_guest_mode(vcpu)) > > + vmx_pass_cet_msrs(vcpu); > > Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off > while in guest mode would put KVM in a weird state as the msr bitmap for > L1 would still allow L1 to access the CET MSRs. Not sure I understand correctly, guest_cpu_has(CET) implies the check of host CET status, if CET is off in host, CET MSRs won't exposed to L1 guest. > > Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we > kill that path entirely with -EINVAL? > Do you mean prevent L1 using KVM_SET_CPUID{2} to expose CET feature bits to L2? > > } > > > > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) > > -- > > 2.17.2 > >
On Wed, Dec 11, 2019 at 09:32:07AM +0800, Yang Weijiang wrote: > On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > > CET MSRs pass through Guest directly to enhance performance. > > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > > these MSRs are defined in kernel and re-used here. > > > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > > index dd387a785c1e..4166c4fcad1e 100644 > > > --- a/arch/x86/kvm/cpuid.c > > > +++ b/arch/x86/kvm/cpuid.c > > > @@ -371,13 +371,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) > > > F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | > > > F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | > > > F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | > > > - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); > > > + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK); > > > > > > /* cpuid 7.0.edx*/ > > > const u32 kvm_cpuid_7_0_edx_x86_features = > > > F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | > > > F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | > > > - F(MD_CLEAR); > > > + F(MD_CLEAR) | F(IBT); > > > > Advertising CET to userspace/guest needs to be done at the end of the > > series, or at least after CR4.CET is no longer reserved, e.g. KVM_SET_SREGS > > will fail and the guest will get a #GP when trying to set CR4.CET. > > > > I'm pretty sure I've said this at least twice in previous versions of > > this series... > > Thanks Sean for picking these up! > The reason is, starting from this patch, I'm using guest_cpuid_has(CET) > to check the availability of guest CET CPUID, so logically I would like to let > the readers understand CET related CPUID word is > defined as above. But no problem, I can move these definitions to a > latter patch as the patchset only meaningful as a whole. Adding usage of guest_cpuid_has(CET) without advertising CET is perfectly ok from a functionality perspective. Having a user without a consumer isn't ideal, but it's better than having one gigantic patch. The problem with advertising CET when it's not fully supported is that it will break bisection, e.g. trying to boot a CET-enabled guest would get a #GP during boot and likely crash. Whether or not a series is useful when taken as a whole is orthogonal to the integrity of each invidiual patch.
On Tue, Dec 10, 2019 at 05:50:52PM -0800, Sean Christopherson wrote: > On Wed, Dec 11, 2019 at 09:32:07AM +0800, Yang Weijiang wrote: > > On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > > > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > > > CET MSRs pass through Guest directly to enhance performance. > > > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > > > these MSRs are defined in kernel and re-used here. > > > > > > > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > > > > index dd387a785c1e..4166c4fcad1e 100644 > > > > --- a/arch/x86/kvm/cpuid.c > > > > +++ b/arch/x86/kvm/cpuid.c > > > > @@ -371,13 +371,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) > > > > F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | > > > > F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | > > > > F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | > > > > - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); > > > > + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK); > > > > > > > > /* cpuid 7.0.edx*/ > > > > const u32 kvm_cpuid_7_0_edx_x86_features = > > > > F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | > > > > F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | > > > > - F(MD_CLEAR); > > > > + F(MD_CLEAR) | F(IBT); > > > > > > Advertising CET to userspace/guest needs to be done at the end of the > > > series, or at least after CR4.CET is no longer reserved, e.g. KVM_SET_SREGS > > > will fail and the guest will get a #GP when trying to set CR4.CET. > > > > > > I'm pretty sure I've said this at least twice in previous versions of > > > this series... > > > > Thanks Sean for picking these up! > > The reason is, starting from this patch, I'm using guest_cpuid_has(CET) > > to check the availability of guest CET CPUID, so logically I would like to let > > the readers understand CET related CPUID word is > > defined as above. But no problem, I can move these definitions to a > > latter patch as the patchset only meaningful as a whole. > > Adding usage of guest_cpuid_has(CET) without advertising CET is perfectly > ok from a functionality perspective. Having a user without a consumer > isn't ideal, but it's better than having one gigantic patch. > > The problem with advertising CET when it's not fully supported is that it > will break bisection, e.g. trying to boot a CET-enabled guest would get a > #GP during boot and likely crash. Whether or not a series is useful when > taken as a whole is orthogonal to the integrity of each invidiual patch. Oh, I omitted case likes bisection, you're right, I'll change it, thanks a lot!
On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > CET MSRs pass through Guest directly to enhance performance. > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > these MSRs are defined in kernel and re-used here. > > > + > > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > { > > struct vcpu_vmx *vmx = to_vmx(vcpu); > > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > > update_intel_pt_cfg(vcpu); > > + > > + if (!is_guest_mode(vcpu)) > > + vmx_pass_cet_msrs(vcpu); > > Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off > while in guest mode would put KVM in a weird state as the msr bitmap for > L1 would still allow L1 to access the CET MSRs. > Hi, Sean, I don't get you, there's guest mode check before access CET msrs, it'll fail if it's in guest mode. > Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we > kill that path entirely with -EINVAL? > Do you mean don't expose CET cpuids to L2 guest? thanks! > > } > > > > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) > > -- > > 2.17.2 > >
On Mon, Dec 16, 2019 at 10:18:16AM +0800, Yang Weijiang wrote: > On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > > CET MSRs pass through Guest directly to enhance performance. > > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > > these MSRs are defined in kernel and re-used here. > > > > > + > > > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > { > > > struct vcpu_vmx *vmx = to_vmx(vcpu); > > > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > > > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > > > update_intel_pt_cfg(vcpu); > > > + > > > + if (!is_guest_mode(vcpu)) > > > + vmx_pass_cet_msrs(vcpu); > > > > Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off > > while in guest mode would put KVM in a weird state as the msr bitmap for > > L1 would still allow L1 to access the CET MSRs. > > > Hi, Sean, > I don't get you, there's guest mode check before access CET msrs, it'll > fail if it's in guest mode. KVM can exit to userspae while L2 is active. If userspace then did a KVM_SET_CPUID2, e.g. instead of KVM_RUN, vmx_cpuid_update() would skip vmx_pass_cet_msrs() and KVM would never update L1's MSR bitmaps. > > Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we > > kill that path entirely with -EINVAL? > > > Do you mean don't expose CET cpuids to L2 guest? I mean completely disallow KVM_SET_CPUID and KVM_SET_CPUID2 if is_guest_mode() is true. My question is mostly directed at Paolo and anyone else that has an opinion on whether we can massage the ABI to retroactively change KVM_SET_CPUID{2} behavior.
On Tue, Dec 17, 2019 at 04:34:55PM -0800, Sean Christopherson wrote: > On Mon, Dec 16, 2019 at 10:18:16AM +0800, Yang Weijiang wrote: > > On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > > > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > > > CET MSRs pass through Guest directly to enhance performance. > > > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > > > these MSRs are defined in kernel and re-used here. > > > > > > > + > > > > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > > { > > > > struct vcpu_vmx *vmx = to_vmx(vcpu); > > > > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > > > > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > > > > update_intel_pt_cfg(vcpu); > > > > + > > > > + if (!is_guest_mode(vcpu)) > > > > + vmx_pass_cet_msrs(vcpu); > > > > > > Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off > > > while in guest mode would put KVM in a weird state as the msr bitmap for > > > L1 would still allow L1 to access the CET MSRs. > > > > > Hi, Sean, > > I don't get you, there's guest mode check before access CET msrs, it'll > > fail if it's in guest mode. > > KVM can exit to userspae while L2 is active. If userspace then did a > KVM_SET_CPUID2, e.g. instead of KVM_RUN, vmx_cpuid_update() would skip > vmx_pass_cet_msrs() and KVM would never update L1's MSR bitmaps. > Thanks, it makes sense to me. Given current implementation, how about removing above check and adding it in CET CPUID enumeration for L2 so that no CET msrs passed through to L2 when is_guest_mode() is true? > > > Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we > > > kill that path entirely with -EINVAL? > > > > > Do you mean don't expose CET cpuids to L2 guest? > > I mean completely disallow KVM_SET_CPUID and KVM_SET_CPUID2 if > is_guest_mode() is true. My question is mostly directed at Paolo and > anyone else that has an opinion on whether we can massage the ABI to > retroactively change KVM_SET_CPUID{2} behavior. This sounds like something deserving an individual patch after get agreement in community. I'll put it aside right now.
On Wed, Dec 18, 2019 at 09:55:13PM +0800, Yang Weijiang wrote: > On Tue, Dec 17, 2019 at 04:34:55PM -0800, Sean Christopherson wrote: > > On Mon, Dec 16, 2019 at 10:18:16AM +0800, Yang Weijiang wrote: > > > On Tue, Dec 10, 2019 at 01:18:21PM -0800, Sean Christopherson wrote: > > > > On Fri, Nov 01, 2019 at 04:52:18PM +0800, Yang Weijiang wrote: > > > > > CET MSRs pass through Guest directly to enhance performance. > > > > > CET runtime control settings are stored in MSR_IA32_{U,S}_CET, > > > > > Shadow Stack Pointer(SSP) are stored in MSR_IA32_PL{0,1,2,3}_SSP, > > > > > SSP table base address is stored in MSR_IA32_INT_SSP_TAB, > > > > > these MSRs are defined in kernel and re-used here. > > > > > > > > > + > > > > > static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > > > { > > > > > struct vcpu_vmx *vmx = to_vmx(vcpu); > > > > > @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) > > > > > if (boot_cpu_has(X86_FEATURE_INTEL_PT) && > > > > > guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) > > > > > update_intel_pt_cfg(vcpu); > > > > > + > > > > > + if (!is_guest_mode(vcpu)) > > > > > + vmx_pass_cet_msrs(vcpu); > > > > > > > > Hmm, this looks insufficent, e.g. deliberately toggling CET from on->off > > > > while in guest mode would put KVM in a weird state as the msr bitmap for > > > > L1 would still allow L1 to access the CET MSRs. > > > > > > > Hi, Sean, > > > I don't get you, there's guest mode check before access CET msrs, it'll > > > fail if it's in guest mode. > > > > KVM can exit to userspae while L2 is active. If userspace then did a > > KVM_SET_CPUID2, e.g. instead of KVM_RUN, vmx_cpuid_update() would skip > > vmx_pass_cet_msrs() and KVM would never update L1's MSR bitmaps. > > > Thanks, it makes sense to me. Given current implementation, how about > removing above check and adding it in CET CPUID > enumeration for L2 so that no CET msrs passed through to L2 when > is_guest_mode() is true? But you still need to update L1's MSR bitmaps. That can obviously be done all at once, but it's annoying and IMO unnecessarily complex. > > > > Allowing KVM_SET_CPUID{2} while running a nested guest seems bogus, can we > > > > kill that path entirely with -EINVAL? > > > > > > > Do you mean don't expose CET cpuids to L2 guest? > > > > I mean completely disallow KVM_SET_CPUID and KVM_SET_CPUID2 if > > is_guest_mode() is true. My question is mostly directed at Paolo and > > anyone else that has an opinion on whether we can massage the ABI to > > retroactively change KVM_SET_CPUID{2} behavior. > > This sounds like something deserving an individual patch after get > agreement in community. I'll put it aside right now. Normally I would agree, but I think in this case it would significantly reduce the complexity and implementation cost of CET support. I'll send a patch to kickstart the conversation, it's a tiny change in terms of code.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index dd387a785c1e..4166c4fcad1e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -371,13 +371,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | F(SHSTK); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR); + F(MD_CLEAR) | F(IBT); /* cpuid 7.1.eax */ const u32 kvm_cpuid_7_1_eax_x86_features = diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index d78a61408243..1d77b880084d 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -27,6 +27,8 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); +u64 kvm_supported_xss(void); + static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) { return vcpu->arch.maxphyaddr; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a84198cff397..db03d9dc1297 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2918,6 +2918,24 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vmcs_writel(GUEST_CR3, guest_cr3); } +static bool guest_cet_allowed(struct kvm_vcpu *vcpu, u32 feature, u32 mode) +{ + u64 kvm_xss = kvm_supported_xss(); + + /* + * Sanity check for guest CET dependencies, guest_cpu_has(SHSTK|IBT) has + * implied corresponding host CET status check. + */ + if (feature == X86_FEATURE_SHSTK) + return guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) && + (kvm_xss & mode); + else if (feature == X86_FEATURE_IBT) + return guest_cpuid_has(vcpu, X86_FEATURE_IBT) && + (kvm_xss & mode); + + return false; +} + int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7001,6 +7019,50 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4)); } +static void vmx_pass_cet_msrs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; + + /* + * U_CET is required for USER CET, per CET spec., meanwhile U_CET and + * PL3_SPP are a bundle for USER CET xsaves. + */ + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_USER) || + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_USER)) { + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW); + } else { + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_U_CET, MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL3_SSP, MSR_TYPE_RW, true); + } + /* + * S_CET is required for KERNEL CET, meanwhile PL0_SSP ... PL2_SSP are a bundle + * for CET KERNEL xsaves. + */ + if (guest_cet_allowed(vcpu, X86_FEATURE_SHSTK, XFEATURE_MASK_CET_KERNEL) || + guest_cet_allowed(vcpu, X86_FEATURE_IBT, XFEATURE_MASK_CET_KERNEL)) { + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW); + + /* SSP_TAB only available for KERNEL SHSTK.*/ + if (guest_cpuid_has(vcpu, X86_FEATURE_SHSTK)) + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, + MSR_TYPE_RW); + else + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, + MSR_TYPE_RW, true); + } else { + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_S_CET, MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL0_SSP, MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL1_SSP, MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_PL2_SSP, MSR_TYPE_RW, true); + vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW, true); + } +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7025,6 +7087,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (boot_cpu_has(X86_FEATURE_INTEL_PT) && guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT)) update_intel_pt_cfg(vcpu); + + if (!is_guest_mode(vcpu)) + vmx_pass_cet_msrs(vcpu); } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)