Message ID | 20230914063325.85503-26-weijiang.yang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable CET Virtualization | expand |
On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote: > Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting > to enable CET for nested VM. > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > --- > arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- > arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- > arch/x86/kvm/vmx/vmx.c | 2 ++ > 4 files changed, 46 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 78a3be394d00..2c4ff13fddb0 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > > + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_U_CET, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_S_CET, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL0_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL1_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL2_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_PL3_SSP, MSR_TYPE_RW); > + > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); > + > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > vmx->nested.force_msr_bitmap_recalc = false; > @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > - VM_EXIT_CLEAR_BNDCFGS; > + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; > msrs->exit_ctls_high |= > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | > @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, > #ifdef CONFIG_X86_64 > VM_ENTRY_IA32E_MODE | > #endif > - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; > + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | > + VM_ENTRY_LOAD_CET_STATE; > msrs->entry_ctls_high |= > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); > diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c > index 106a72c923ca..4233b5ca9461 100644 > --- a/arch/x86/kvm/vmx/vmcs12.c > +++ b/arch/x86/kvm/vmx/vmcs12.c > @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { > FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), > FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), > FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), > + FIELD(GUEST_S_CET, guest_s_cet), > + FIELD(GUEST_SSP, guest_ssp), > + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), > FIELD(HOST_CR0, host_cr0), > FIELD(HOST_CR3, host_cr3), > FIELD(HOST_CR4, host_cr4), > @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = { > FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), > FIELD(HOST_RSP, host_rsp), > FIELD(HOST_RIP, host_rip), > + FIELD(HOST_S_CET, host_s_cet), > + FIELD(HOST_SSP, host_ssp), > + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl), > }; > const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); > diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h > index 01936013428b..3884489e7f7e 100644 > --- a/arch/x86/kvm/vmx/vmcs12.h > +++ b/arch/x86/kvm/vmx/vmcs12.h > @@ -117,7 +117,13 @@ struct __packed vmcs12 { > natural_width host_ia32_sysenter_eip; > natural_width host_rsp; > natural_width host_rip; > - natural_width paddingl[8]; /* room for future expansion */ > + natural_width host_s_cet; > + natural_width host_ssp; > + natural_width host_ssp_tbl; > + natural_width guest_s_cet; > + natural_width guest_ssp; > + natural_width guest_ssp_tbl; > + natural_width paddingl[2]; /* room for future expansion */ > u32 pin_based_vm_exec_control; > u32 cpu_based_vm_exec_control; > u32 exception_bitmap; > @@ -292,6 +298,12 @@ static inline void vmx_check_vmcs12_offsets(void) > CHECK_OFFSET(host_ia32_sysenter_eip, 656); > CHECK_OFFSET(host_rsp, 664); > CHECK_OFFSET(host_rip, 672); > + CHECK_OFFSET(host_s_cet, 680); > + CHECK_OFFSET(host_ssp, 688); > + CHECK_OFFSET(host_ssp_tbl, 696); > + CHECK_OFFSET(guest_s_cet, 704); > + CHECK_OFFSET(guest_ssp, 712); > + CHECK_OFFSET(guest_ssp_tbl, 720); > CHECK_OFFSET(pin_based_vm_exec_control, 744); > CHECK_OFFSET(cpu_based_vm_exec_control, 748); > CHECK_OFFSET(exception_bitmap, 752); > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index f0dea8ecd0c6..2c43f1088d77 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7731,6 +7731,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) > cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); > cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); > cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); > + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK)); > + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT)); > > #undef cr4_fixed1_update > } It is surprising how little needs to be done to support the nested mode, but it does look correct. I might have missed something though, can't be 100% sure in this case. Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> Best regards, Maxim Levitsky
On Thu, Sep 14, 2023 at 02:33:25AM -0400, Yang Weijiang wrote: >Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >to enable CET for nested VM. > >Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >--- > arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- > arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- > arch/x86/kvm/vmx/vmx.c | 2 ++ > 4 files changed, 46 insertions(+), 3 deletions(-) > >diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >index 78a3be394d00..2c4ff13fddb0 100644 >--- a/arch/x86/kvm/vmx/nested.c >+++ b/arch/x86/kvm/vmx/nested.c >@@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > >+ /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_U_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_S_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL0_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL1_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL2_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL3_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >+ > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > vmx->nested.force_msr_bitmap_recalc = false; >@@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > VM_EXIT_HOST_ADDR_SPACE_SIZE | > #endif > VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >- VM_EXIT_CLEAR_BNDCFGS; >+ VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; > msrs->exit_ctls_high |= > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | >@@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, > #ifdef CONFIG_X86_64 > VM_ENTRY_IA32E_MODE | > #endif >- VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; >+ VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | >+ VM_ENTRY_LOAD_CET_STATE; > msrs->entry_ctls_high |= > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); >diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c >index 106a72c923ca..4233b5ca9461 100644 >--- a/arch/x86/kvm/vmx/vmcs12.c >+++ b/arch/x86/kvm/vmx/vmcs12.c >@@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { > FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), > FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), > FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), >+ FIELD(GUEST_S_CET, guest_s_cet), >+ FIELD(GUEST_SSP, guest_ssp), >+ FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), I think we need to sync guest states, e.g., guest_s_cet/guest_ssp/guest_ssp_tbl, between vmcs02 and vmcs12 on nested VM entry/exit, probably in sync_vmcs02_to_vmcs12() and prepare_vmcs12() or "_rare" variants of them.
On 11/1/2023 10:09 AM, Chao Gao wrote: > On Thu, Sep 14, 2023 at 02:33:25AM -0400, Yang Weijiang wrote: >> Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >> to enable CET for nested VM. >> >> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >> --- >> arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- >> arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ >> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- >> arch/x86/kvm/vmx/vmx.c | 2 ++ >> 4 files changed, 46 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >> index 78a3be394d00..2c4ff13fddb0 100644 >> --- a/arch/x86/kvm/vmx/nested.c >> +++ b/arch/x86/kvm/vmx/nested.c >> @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, >> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> MSR_IA32_FLUSH_CMD, MSR_TYPE_W); >> >> + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_U_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_S_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL0_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL1_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL2_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL3_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >> + >> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); >> >> vmx->nested.force_msr_bitmap_recalc = false; >> @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> - VM_EXIT_CLEAR_BNDCFGS; >> + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; >> msrs->exit_ctls_high |= >> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | >> @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, >> #ifdef CONFIG_X86_64 >> VM_ENTRY_IA32E_MODE | >> #endif >> - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; >> + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | >> + VM_ENTRY_LOAD_CET_STATE; >> msrs->entry_ctls_high |= >> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | >> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); >> diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c >> index 106a72c923ca..4233b5ca9461 100644 >> --- a/arch/x86/kvm/vmx/vmcs12.c >> +++ b/arch/x86/kvm/vmx/vmcs12.c >> @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { >> FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), >> FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), >> FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), >> + FIELD(GUEST_S_CET, guest_s_cet), >> + FIELD(GUEST_SSP, guest_ssp), >> + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), > I think we need to sync guest states, e.g., guest_s_cet/guest_ssp/guest_ssp_tbl, > between vmcs02 and vmcs12 on nested VM entry/exit, probably in > sync_vmcs02_to_vmcs12() and prepare_vmcs12() or "_rare" variants of them. Thanks Chao! Let me double check the nested code part and reply.
On Wed, 2023-11-01 at 10:09 +0800, Chao Gao wrote: > On Thu, Sep 14, 2023 at 02:33:25AM -0400, Yang Weijiang wrote: > > Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting > > to enable CET for nested VM. > > > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> > > --- > > arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- > > arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ > > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- > > arch/x86/kvm/vmx/vmx.c | 2 ++ > > 4 files changed, 46 insertions(+), 3 deletions(-) > > > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > > index 78a3be394d00..2c4ff13fddb0 100644 > > --- a/arch/x86/kvm/vmx/nested.c > > +++ b/arch/x86/kvm/vmx/nested.c > > @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > > > > + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_U_CET, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_S_CET, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_PL0_SSP, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_PL1_SSP, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_PL2_SSP, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_PL3_SSP, MSR_TYPE_RW); > > + > > + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > > + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); > > + > > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > > > vmx->nested.force_msr_bitmap_recalc = false; > > @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, > > VM_EXIT_HOST_ADDR_SPACE_SIZE | > > #endif > > VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | > > - VM_EXIT_CLEAR_BNDCFGS; > > + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; > > msrs->exit_ctls_high |= > > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | > > VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | > > @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, > > #ifdef CONFIG_X86_64 > > VM_ENTRY_IA32E_MODE | > > #endif > > - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; > > + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | > > + VM_ENTRY_LOAD_CET_STATE; > > msrs->entry_ctls_high |= > > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | > > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); > > diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c > > index 106a72c923ca..4233b5ca9461 100644 > > --- a/arch/x86/kvm/vmx/vmcs12.c > > +++ b/arch/x86/kvm/vmx/vmcs12.c > > @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { > > FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), > > FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), > > FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), > > + FIELD(GUEST_S_CET, guest_s_cet), > > + FIELD(GUEST_SSP, guest_ssp), > > + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), > > I think we need to sync guest states, e.g., guest_s_cet/guest_ssp/guest_ssp_tbl, > between vmcs02 and vmcs12 on nested VM entry/exit, probably in > sync_vmcs02_to_vmcs12() and prepare_vmcs12() or "_rare" variants of them. > Aha, this is why I suspected that nested support is incomplete, 100% agree. In particular, looking at Intel's SDM I see that: HOST_S_CET, HOST_SSP, HOST_INTR_SSP_TABLE needs to be copied from vmcb12 to vmcb02 but not vise versa because the CPU doesn't touch them. GUEST_S_CET, GUEST_SSP, GUEST_INTR_SSP_TABLE should be copied bi-directionally. This of course depends on the corresponding vm entry and vm exit controls being set. That means that it is legal in theory to do VM entry/exit with CET enabled but not use VM_ENTRY_LOAD_CET_STATE and/or VM_EXIT_LOAD_CET_STATE, because for example nested hypervisor in theory can opt to save/load these itself. I think that this is all, but I also can't be 100% sure. This thing has to be tested well before we can be sure that it works. Best regards, Maxim Levitsky
On 11/1/2023 10:09 AM, Chao Gao wrote: > On Thu, Sep 14, 2023 at 02:33:25AM -0400, Yang Weijiang wrote: >> Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >> to enable CET for nested VM. >> >> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >> --- >> arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- >> arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ >> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- >> arch/x86/kvm/vmx/vmx.c | 2 ++ >> 4 files changed, 46 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >> index 78a3be394d00..2c4ff13fddb0 100644 >> --- a/arch/x86/kvm/vmx/nested.c >> +++ b/arch/x86/kvm/vmx/nested.c >> @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, >> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> MSR_IA32_FLUSH_CMD, MSR_TYPE_W); >> >> + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_U_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_S_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL0_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL1_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL2_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL3_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >> + >> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); >> >> vmx->nested.force_msr_bitmap_recalc = false; >> @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, >> VM_EXIT_HOST_ADDR_SPACE_SIZE | >> #endif >> VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >> - VM_EXIT_CLEAR_BNDCFGS; >> + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; >> msrs->exit_ctls_high |= >> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >> VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | >> @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, >> #ifdef CONFIG_X86_64 >> VM_ENTRY_IA32E_MODE | >> #endif >> - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; >> + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | >> + VM_ENTRY_LOAD_CET_STATE; >> msrs->entry_ctls_high |= >> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | >> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); >> diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c >> index 106a72c923ca..4233b5ca9461 100644 >> --- a/arch/x86/kvm/vmx/vmcs12.c >> +++ b/arch/x86/kvm/vmx/vmcs12.c >> @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { >> FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), >> FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), >> FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), >> + FIELD(GUEST_S_CET, guest_s_cet), >> + FIELD(GUEST_SSP, guest_ssp), >> + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), > I think we need to sync guest states, e.g., guest_s_cet/guest_ssp/guest_ssp_tbl, > between vmcs02 and vmcs12 on nested VM entry/exit, probably in > sync_vmcs02_to_vmcs12() and prepare_vmcs12() or "_rare" variants of them. After checked around the code, it's necessary to sync related fields from vmcs02 to vmcs12 at nested VM exit so that L1 or userspace can access correct values. I'll add this part, thanks!
On 11/1/2023 5:54 PM, Maxim Levitsky wrote: > On Wed, 2023-11-01 at 10:09 +0800, Chao Gao wrote: >> On Thu, Sep 14, 2023 at 02:33:25AM -0400, Yang Weijiang wrote: >>> Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >>> to enable CET for nested VM. >>> >>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >>> --- >>> arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- >>> arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ >>> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- >>> arch/x86/kvm/vmx/vmx.c | 2 ++ >>> 4 files changed, 46 insertions(+), 3 deletions(-) >>> >>> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >>> index 78a3be394d00..2c4ff13fddb0 100644 >>> --- a/arch/x86/kvm/vmx/nested.c >>> +++ b/arch/x86/kvm/vmx/nested.c >>> @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, >>> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> MSR_IA32_FLUSH_CMD, MSR_TYPE_W); >>> >>> + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_U_CET, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_S_CET, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_PL0_SSP, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_PL1_SSP, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_PL2_SSP, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_PL3_SSP, MSR_TYPE_RW); >>> + >>> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >>> + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >>> + >>> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); >>> >>> vmx->nested.force_msr_bitmap_recalc = false; >>> @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, >>> VM_EXIT_HOST_ADDR_SPACE_SIZE | >>> #endif >>> VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | >>> - VM_EXIT_CLEAR_BNDCFGS; >>> + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; >>> msrs->exit_ctls_high |= >>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | >>> VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | >>> @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, >>> #ifdef CONFIG_X86_64 >>> VM_ENTRY_IA32E_MODE | >>> #endif >>> - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; >>> + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | >>> + VM_ENTRY_LOAD_CET_STATE; >>> msrs->entry_ctls_high |= >>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | >>> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); >>> diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c >>> index 106a72c923ca..4233b5ca9461 100644 >>> --- a/arch/x86/kvm/vmx/vmcs12.c >>> +++ b/arch/x86/kvm/vmx/vmcs12.c >>> @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { >>> FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), >>> FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), >>> FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), >>> + FIELD(GUEST_S_CET, guest_s_cet), >>> + FIELD(GUEST_SSP, guest_ssp), >>> + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), >> I think we need to sync guest states, e.g., guest_s_cet/guest_ssp/guest_ssp_tbl, >> between vmcs02 and vmcs12 on nested VM entry/exit, probably in >> sync_vmcs02_to_vmcs12() and prepare_vmcs12() or "_rare" variants of them. >> > Aha, this is why I suspected that nested support is incomplete, > 100% agree. > > In particular, looking at Intel's SDM I see that: > > HOST_S_CET, HOST_SSP, HOST_INTR_SSP_TABLE needs to be copied from vmcb12 to vmcb02 but not vise versa > because the CPU doesn't touch them. > > GUEST_S_CET, GUEST_SSP, GUEST_INTR_SSP_TABLE should be copied bi-directionally. Yes, I'll make this part of code complete in next version, thanks! > This of course depends on the corresponding vm entry and vm exit controls being set. > That means that it is legal in theory to do VM entry/exit with CET enabled but not use > VM_ENTRY_LOAD_CET_STATE and/or VM_EXIT_LOAD_CET_STATE, > because for example nested hypervisor in theory can opt to save/load these itself. > > I think that this is all, but I also can't be 100% sure. This thing has to be tested well before > we can be sure that it works. > > Best regards, > Maxim Levitsky >
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 78a3be394d00..2c4ff13fddb0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -660,6 +660,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_FLUSH_CMD, MSR_TYPE_W); + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_U_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_S_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL0_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL1_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL2_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL3_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); + kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); vmx->nested.force_msr_bitmap_recalc = false; @@ -6794,7 +6816,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | - VM_EXIT_CLEAR_BNDCFGS; + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | @@ -6816,7 +6838,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | + VM_ENTRY_LOAD_CET_STATE; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 106a72c923ca..4233b5ca9461 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), + FIELD(GUEST_S_CET, guest_s_cet), + FIELD(GUEST_SSP, guest_ssp), + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), FIELD(HOST_CR0, host_cr0), FIELD(HOST_CR3, host_cr3), FIELD(HOST_CR4, host_cr4), @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), + FIELD(HOST_S_CET, host_s_cet), + FIELD(HOST_SSP, host_ssp), + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl), }; const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 01936013428b..3884489e7f7e 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -117,7 +117,13 @@ struct __packed vmcs12 { natural_width host_ia32_sysenter_eip; natural_width host_rsp; natural_width host_rip; - natural_width paddingl[8]; /* room for future expansion */ + natural_width host_s_cet; + natural_width host_ssp; + natural_width host_ssp_tbl; + natural_width guest_s_cet; + natural_width guest_ssp; + natural_width guest_ssp_tbl; + natural_width paddingl[2]; /* room for future expansion */ u32 pin_based_vm_exec_control; u32 cpu_based_vm_exec_control; u32 exception_bitmap; @@ -292,6 +298,12 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(host_ia32_sysenter_eip, 656); CHECK_OFFSET(host_rsp, 664); CHECK_OFFSET(host_rip, 672); + CHECK_OFFSET(host_s_cet, 680); + CHECK_OFFSET(host_ssp, 688); + CHECK_OFFSET(host_ssp_tbl, 696); + CHECK_OFFSET(guest_s_cet, 704); + CHECK_OFFSET(guest_ssp, 712); + CHECK_OFFSET(guest_ssp_tbl, 720); CHECK_OFFSET(pin_based_vm_exec_control, 744); CHECK_OFFSET(cpu_based_vm_exec_control, 748); CHECK_OFFSET(exception_bitmap, 752); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f0dea8ecd0c6..2c43f1088d77 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7731,6 +7731,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK)); + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT)); #undef cr4_fixed1_update }
Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting to enable CET for nested VM. Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> --- arch/x86/kvm/vmx/nested.c | 27 +++++++++++++++++++++++++-- arch/x86/kvm/vmx/vmcs12.c | 6 ++++++ arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++++++- arch/x86/kvm/vmx/vmx.c | 2 ++ 4 files changed, 46 insertions(+), 3 deletions(-)