diff mbox series

[v14,10/13] KVM: x86: Enable CET virtualization for VMX and advertise CET to userspace

Message ID 20201106011637.14289-11-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce support for guest CET feature | expand

Commit Message

Yang, Weijiang Nov. 6, 2020, 1:16 a.m. UTC
Set the feature bits so that CET capabilities can be seen in guest via
CPUID enumeration. Add CR4.CET bit support in order to allow guest set CET
master control bit(CR4.CET).

Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET. Reset guest CET states in vmcs so as
to avoid vmentry failure when guest toggles CR4.CET bit, e.g. during guest
reboot.

Don't expose CET feature if dependent CET bits are cleared in host XSS,
or if XSAVES isn't supported.  Updating the CET features in common x86 is
a little ugly, but there is on clean solution without risking breakage of
SVM if SVM hardware ever gains support for CET, e.g. moving everything to
common x86 would prematurely expose CET on SVM.  The alternative is to
put all the logic in VMX, but that means rereading host_xss in VMX and
duplicating the XSAVES check across VMX and SVM.

Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +-
 arch/x86/kvm/cpuid.c            |  5 +--
 arch/x86/kvm/vmx/capabilities.h |  5 +++
 arch/x86/kvm/vmx/vmx.c          | 64 +++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.c              |  8 +++++
 5 files changed, 79 insertions(+), 6 deletions(-)

Comments

kernel test robot Nov. 9, 2020, 7:23 a.m. UTC | #1
Hi Yang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/master]
[also build test WARNING on linus/master v5.10-rc3 next-20201106]
[cannot apply to vhost/linux-next kvm/linux-next linux/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Yang-Weijiang/Introduce-support-for-guest-CET-feature/20201106-090915
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 6f72faf4a32303c8bdc6491186b79391e9cf0c7e
config: i386-randconfig-r022-20201109 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/34e06718bac59b9ecb835d2c4a04ae9378067819
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Yang-Weijiang/Introduce-support-for-guest-CET-feature/20201106-090915
        git checkout 34e06718bac59b9ecb835d2c4a04ae9378067819
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from arch/x86/kvm/cpuid.h:5,
                    from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/x86.h: In function 'kvm_cet_supported':
   arch/x86/kvm/x86.h:291:25: error: 'XFEATURE_MASK_CET_USER' undeclared (first use in this function); did you mean 'XFEATURE_MASK_SSE'?
     291 |  return supported_xss & XFEATURE_MASK_CET_USER;
         |                         ^~~~~~~~~~~~~~~~~~~~~~
         |                         XFEATURE_MASK_SSE
   arch/x86/kvm/x86.h:291:25: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/kvm_host.h:36,
                    from arch/x86/kvm/x86.c:19:
   arch/x86/kvm/x86.c: At top level:
   arch/x86/include/asm/kvm_host.h:104:8: error: 'X86_CR4_CET' undeclared here (not in a function); did you mean 'X86_CR4_DE'?
     104 |      | X86_CR4_CET))
         |        ^~~~~~~~~~~
   arch/x86/kvm/x86.c:101:46: note: in expansion of macro 'CR4_RESERVED_BITS'
     101 | static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
         |                                              ^~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:1251:2: error: 'MSR_IA32_U_CET' undeclared here (not in a function); did you mean 'MSR_IA32_PMC0'?
    1251 |  MSR_IA32_U_CET, MSR_IA32_S_CET, MSR_IA32_INT_SSP_TAB, MSR_KVM_GUEST_SSP,
         |  ^~~~~~~~~~~~~~
         |  MSR_IA32_PMC0
   arch/x86/kvm/x86.c:1251:18: error: 'MSR_IA32_S_CET' undeclared here (not in a function); did you mean 'MSR_IA32_PMC0'?
    1251 |  MSR_IA32_U_CET, MSR_IA32_S_CET, MSR_IA32_INT_SSP_TAB, MSR_KVM_GUEST_SSP,
         |                  ^~~~~~~~~~~~~~
         |                  MSR_IA32_PMC0
   arch/x86/kvm/x86.c:1251:34: error: 'MSR_IA32_INT_SSP_TAB' undeclared here (not in a function)
    1251 |  MSR_IA32_U_CET, MSR_IA32_S_CET, MSR_IA32_INT_SSP_TAB, MSR_KVM_GUEST_SSP,
         |                                  ^~~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:1252:2: error: 'MSR_IA32_PL0_SSP' undeclared here (not in a function); did you mean 'MSR_IA32_MCG_ESP'?
    1252 |  MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, MSR_IA32_PL3_SSP,
         |  ^~~~~~~~~~~~~~~~
         |  MSR_IA32_MCG_ESP
   arch/x86/kvm/x86.c:1252:20: error: 'MSR_IA32_PL1_SSP' undeclared here (not in a function); did you mean 'MSR_IA32_MCG_ESP'?
    1252 |  MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, MSR_IA32_PL3_SSP,
         |                    ^~~~~~~~~~~~~~~~
         |                    MSR_IA32_MCG_ESP
   arch/x86/kvm/x86.c:1252:38: error: 'MSR_IA32_PL2_SSP' undeclared here (not in a function); did you mean 'MSR_IA32_MCG_ESP'?
    1252 |  MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, MSR_IA32_PL3_SSP,
         |                                      ^~~~~~~~~~~~~~~~
         |                                      MSR_IA32_MCG_ESP
   arch/x86/kvm/x86.c:1252:56: error: 'MSR_IA32_PL3_SSP' undeclared here (not in a function); did you mean 'MSR_IA32_MCG_ESP'?
    1252 |  MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, MSR_IA32_PL3_SSP,
         |                                                        ^~~~~~~~~~~~~~~~
         |                                                        MSR_IA32_MCG_ESP
   arch/x86/kvm/x86.c: In function 'is_xsaves_msr':
   arch/x86/kvm/x86.c:3591:15: warning: comparison between pointer and integer
    3591 |  return index == MSR_IA32_U_CET ||
         |               ^~
   arch/x86/kvm/x86.c:3592:16: warning: comparison between pointer and integer
    3592 |         (index >= MSR_IA32_PL0_SSP && index <= MSR_IA32_PL3_SSP);
         |                ^~
   arch/x86/kvm/x86.c:3592:45: warning: comparison between pointer and integer
    3592 |         (index >= MSR_IA32_PL0_SSP && index <= MSR_IA32_PL3_SSP);
         |                                             ^~
   arch/x86/kvm/x86.c: In function 'kvm_arch_hardware_setup':
   arch/x86/kvm/x86.c:10205:21: error: 'X86_FEATURE_SHSTK' undeclared (first use in this function); did you mean 'X86_FEATURE_EST'?
   10205 |   kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
         |                     ^~~~~~~~~~~~~~~~~
         |                     X86_FEATURE_EST
>> arch/x86/kvm/x86.c:10205:21: warning: passing argument 1 of 'kvm_cpu_cap_clear' makes integer from pointer without a cast [-Wint-conversion]
   In file included from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/cpuid.h:277:60: note: expected 'unsigned int' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
     277 | static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
         |                                               ~~~~~~~~~~~~~^~~~~~~~~~~
   arch/x86/kvm/x86.c:10206:21: error: 'X86_FEATURE_IBT' undeclared (first use in this function); did you mean 'X86_FEATURE_IBS'?
   10206 |   kvm_cpu_cap_clear(X86_FEATURE_IBT);
         |                     ^~~~~~~~~~~~~~~
         |                     X86_FEATURE_IBS
   arch/x86/kvm/x86.c:10206:21: warning: passing argument 1 of 'kvm_cpu_cap_clear' makes integer from pointer without a cast [-Wint-conversion]
   In file included from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/cpuid.h:277:60: note: expected 'unsigned int' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
     277 | static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
         |                                               ~~~~~~~~~~~~~^~~~~~~~~~~
   In file included from include/linux/kvm_host.h:36,
                    from arch/x86/kvm/x86.c:19:
   arch/x86/include/asm/kvm_host.h:104:6: error: invalid operands to binary | (have 'long unsigned int' and 'const u32 *' {aka 'const unsigned int *'})
     104 |      | X86_CR4_CET))
         |      ^
         |      |
         |      const u32 * {aka const unsigned int *}
   arch/x86/kvm/x86.h:388:24: note: in expansion of macro 'CR4_RESERVED_BITS'
     388 |  u64 __reserved_bits = CR4_RESERVED_BITS;        \
         |                        ^~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:10210:22: note: in expansion of macro '__cr4_reserved_bits'
   10210 |  cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
         |                      ^~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.h:406:22: warning: passing argument 1 of 'kvm_cpu_cap_has' makes integer from pointer without a cast [-Wint-conversion]
     406 |  if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
         |                      ^~~~~~~~~~~~~~~~~
         |                      |
         |                      const u32 * {aka const unsigned int *}
   arch/x86/kvm/x86.c:10209:55: note: in definition of macro '__kvm_cpu_cap_has'
   10209 | #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
         |                                                       ^
   arch/x86/kvm/x86.c:10210:22: note: in expansion of macro '__cr4_reserved_bits'
   10210 |  cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
         |                      ^~~~~~~~~~~~~~~~~~~
   In file included from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/cpuid.h:301:58: note: expected 'unsigned int' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
     301 | static __always_inline bool kvm_cpu_cap_has(unsigned int x86_feature)
         |                                             ~~~~~~~~~~~~~^~~~~~~~~~~
   arch/x86/kvm/x86.h:407:22: warning: passing argument 1 of 'kvm_cpu_cap_has' makes integer from pointer without a cast [-Wint-conversion]
     407 |      !__cpu_has(__c, X86_FEATURE_IBT))  \
         |                      ^~~~~~~~~~~~~~~
         |                      |
         |                      const u32 * {aka const unsigned int *}
   arch/x86/kvm/x86.c:10209:55: note: in definition of macro '__kvm_cpu_cap_has'
   10209 | #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
         |                                                       ^
   arch/x86/kvm/x86.c:10210:22: note: in expansion of macro '__cr4_reserved_bits'
   10210 |  cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
         |                      ^~~~~~~~~~~~~~~~~~~
   In file included from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/cpuid.h:301:58: note: expected 'unsigned int' but argument is of type 'const u32 *' {aka 'const unsigned int *'}
     301 | static __always_inline bool kvm_cpu_cap_has(unsigned int x86_feature)
         |                                             ~~~~~~~~~~~~~^~~~~~~~~~~
   In file included from arch/x86/kvm/cpuid.h:5,
                    from arch/x86/kvm/mmu.h:7,
                    from arch/x86/kvm/x86.c:22:
   arch/x86/kvm/x86.h:408:19: error: invalid operands to binary | (have 'u64' {aka 'long long unsigned int'} and 'const u32 *' {aka 'const unsigned int *'})
     408 |   __reserved_bits |= X86_CR4_CET;  \
         |                   ^~ ~~~~~~~~~~~
         |                      |
         |                      const u32 * {aka const unsigned int *}
   arch/x86/kvm/x86.c:10210:22: note: in expansion of macro '__cr4_reserved_bits'
   10210 |  cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
         |                      ^~~~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.h:408:3: warning: statement with no effect [-Wunused-value]
     408 |   __reserved_bits |= X86_CR4_CET;  \
         |   ^~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:10210:22: note: in expansion of macro '__cr4_reserved_bits'
   10210 |  cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
         |                      ^~~~~~~~~~~~~~~~~~~
   In file included from include/linux/kvm_host.h:36,
                    from arch/x86/kvm/x86.c:19:
   arch/x86/kvm/x86.c: In function 'kvm_arch_check_processor_compat':
   arch/x86/include/asm/kvm_host.h:104:6: error: invalid operands to binary | (have 'long unsigned int' and 'const u32 *' {aka 'const unsigned int *'})
     104 |      | X86_CR4_CET))
         |      ^
         |      |
         |      const u32 * {aka const unsigned int *}
   arch/x86/kvm/x86.h:388:24: note: in expansion of macro 'CR4_RESERVED_BITS'
     388 |  u64 __reserved_bits = CR4_RESERVED_BITS;        \
         |                        ^~~~~~~~~~~~~~~~~
   arch/x86/kvm/x86.c:10243:6: note: in expansion of macro '__cr4_reserved_bits'
   10243 |  if (__cr4_reserved_bits(cpu_has, c) !=
         |      ^~~~~~~~~~~~~~~~~~~
   In file included from arch/x86/include/asm/thread_info.h:53,
                    from include/linux/thread_info.h:38,
                    from arch/x86/include/asm/preempt.h:7,
                    from include/linux/preempt.h:78,
                    from include/linux/percpu.h:6,
                    from include/linux/context_tracking_state.h:5,
                    from include/linux/hardirq.h:5,
                    from include/linux/kvm_host.h:7,
                    from arch/x86/kvm/x86.c:19:

vim +/kvm_cpu_cap_clear +10205 arch/x86/kvm/x86.c

 10181	
 10182	int kvm_arch_hardware_setup(void *opaque)
 10183	{
 10184		struct kvm_x86_init_ops *ops = opaque;
 10185		int r;
 10186	
 10187		rdmsrl_safe(MSR_EFER, &host_efer);
 10188	
 10189		if (boot_cpu_has(X86_FEATURE_XSAVES))
 10190			rdmsrl(MSR_IA32_XSS, host_xss);
 10191	
 10192		r = ops->hardware_setup();
 10193		if (r != 0)
 10194			return r;
 10195	
 10196		memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
 10197	
 10198		if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
 10199			supported_xss = 0;
 10200		else
 10201			supported_xss &= host_xss;
 10202	
 10203		/* Update CET features now that supported_xss is finalized. */
 10204		if (!kvm_cet_supported()) {
 10205			kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
 10206			kvm_cpu_cap_clear(X86_FEATURE_IBT);
 10207		}
 10208	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Paolo Bonzini Jan. 28, 2021, 5:53 p.m. UTC | #2
On 06/11/20 02:16, Yang Weijiang wrote:
> 
> +
> +	if (((cr4 ^ old_cr4) & X86_CR4_CET) && kvm_cet_supported()) {
> +		vmcs_writel(GUEST_SSP, 0);
> +		vmcs_writel(GUEST_S_CET, 0);
> +		vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
> +	}
> +

Is this behavior documented for bare metal?  I suspect it is at least 
not true for S_CET and INTR_SSP_TABLE, because SMM entry does not save 
those to SMRAM (and clears CR4.CET).

Also, you need to save/restore GUEST_SSP to SMRAM.

Paolo
Yang, Weijiang Jan. 30, 2021, 6:32 a.m. UTC | #3
On Fri, Jan 29, 2021 at 03:38:52PM +0100, Paolo Bonzini wrote:
> On 29/01/21 13:17, Yang Weijiang wrote:
> > > > It's specific to VM case, during VM reboot, memory mode reset but VM_ENTRY_LOAD_CET_STATE
> > > > is still set, and VMCS contains stale GUEST_SSP, this hits vm-entry failure
> > > > documented in 10.7 VM Entry at:
> > > > https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf
> > > > Since CR4.CET is also reset during VM reboot, to take the change to clear the stale data.
> > > > Maybe I need to find a better place to do the things.
> > > Then you must use a field of struct vmx_vcpu instead of the VMCS to hold
> > > GUEST_SSP (while GUEST_S_CET and GUEST_INTR_SSP_TABLE should not be an
> > > issue).
> > > 
> > Sorry, I don't get your point, can I just clear the GUEST_SSP field in this case?
> > Anyway save/restore GUEST_SSP via VMCS is an efficient way.
> 
> You cannot clear it, because it is preserved when CR4.CET is modified.
> 
> However, I checked the latest SDM and the GUEST_SSP rules are changed to
> just this:
> 
> SSP. The following checks are performed if the “load CET state” VM-entry
> control is 1
> — Bits 1:0 must be 0.
> — If the processor supports the Intel 64 architecture, bits 63:N must be
> identical, where N is the CPU’s maximum linear-address width. (This check
> does not apply if the processor supports 64 linear-address bits.) The guest
> SSP value is not required to be canonical; the value of bit N-1 may differ
> from that of bit N.
> 
> In particular it doesn't mention the "IA-32e mode guest" VM-entry control or
> the CS.L bit anymore, so it should not be necessary anymore to even reset
> SSP to 0, and you can keep GUEST_SSP in the VMCS.
>
There could be some gaps between the two specs, I tested VM reboot with these patches,
if I don't clear GUEST_SSP field while CR4.CET is changed, then it always encounters the
vm-entry failure issue, the VMCS dump is like below:

[341485.265277] *** Guest State ***
[341485.265280] CR0: actual=0x0000000000000030,
shadow=0x0000000060000010, gh_mask=fffffffffffffff7
[341485.265281] CR4: actual=0x0000000000002040,
shadow=0x0000000000000000, gh_mask=fffffffffffef871
[341485.265282] CR3 = 0x0000000000000000
[341485.265283] RSP = 0x0000000000000000  RIP = 0x000000000000fff0
[341485.265283] RFLAGS=0x00000002         DR7 = 0x0000000000000400
[341485.265284] Sysenter RSP=0000000000000000
CS:RIP=0000:0000000000000000
[341485.265285] CS:   sel=0xf000, attr=0x0009b, limit=0x0000ffff,
base=0x00000000ffff0000
[341485.265286] DS:   sel=0x0000, attr=0x00093, limit=0x0000ffff,
base=0x0000000000000000
[341485.265287] SS:   sel=0x0000, attr=0x00093, limit=0x0000ffff,
base=0x0000000000000000
[341485.265288] ES:   sel=0x0000, attr=0x00093, limit=0x0000ffff,
base=0x0000000000000000
[341485.265288] FS:   sel=0x0000, attr=0x00093, limit=0x0000ffff,
base=0x0000000000000000
[341485.265289] GS:   sel=0x0000, attr=0x00093, limit=0x0000ffff,
base=0x0000000000000000
[341485.265289] GDTR:                           limit=0x0000ffff,
base=0x0000000000000000
[341485.265290] LDTR: sel=0x0000, attr=0x00082, limit=0x0000ffff,
base=0x0000000000000000
[341485.265291] IDTR:                           limit=0x0000ffff,
base=0x0000000000000000
[341485.265291] TR:   sel=0x0000, attr=0x0008b, limit=0x0000ffff,
base=0x0000000000000000
[341485.265292] EFER =     0x0000000000000000  PAT = 0x0007040600070406
[341485.265292] DebugCtl = 0x0000000000000000  DebugExceptions =
0x0000000000000000
[341485.265293] Interruptibility = 00000000  ActivityState = 00000000
[341485.265294] InterruptStatus = 0000
[341485.265294] S_CET = 0x0000000000000000
[341485.265295] SSP = 0x00007fc1727fdfd0
[341485.265295] SSP TABLE = 0x0000000000000000
[341485.265296] *** Host State ***
[341485.265296] RIP = 0xffffffffc1235900  RSP = 0xffffaed7c150bd68
[341485.265297] CS=0010 SS=0018 DS=0000 ES=0000 FS=0000 GS=0000 TR=0040
[341485.265298] FSBase=00007f6851d36700 GSBase=ffff9de2e0980000
TRBase=fffffe0000141000
[341485.265298] GDTBase=fffffe000013f000 IDTBase=fffffe0000000000
[341485.265299] CR0=0000000080050033 CR3=00000001135ae001
CR4=0000000000f72ee0
[341485.265300] Sysenter RSP=fffffe0000141000
CS:RIP=0010:ffffffff8dc015e0
[341485.265301] EFER = 0x0000000000000d01  PAT = 0x0407050600070106
[341485.265301] *** Control State ***
[341485.265302] PinBased=000000ff CPUBased=b5a06dfa
SecondaryExec=021237eb
[341485.265303] EntryControls=0010d1ff ExitControls=102befff
[341485.265303] ExceptionBitmap=00060042 PFECmask=00000000
PFECmatch=00000000
[341485.265304] VMEntry: intr_info=00000000 errcode=00000000
ilen=00000000
[341485.265305] VMExit: intr_info=00000000 errcode=00000000
ilen=00000002
[341485.265306]         reason=80000021 qualification=0000000000000000
[341485.265306] IDTVectoring: info=00000000 errcode=00000000
[341485.265307] TSC Offset = 0xfffd10acb111a2a0
[341485.265307] TSC Multiplier = 0x0001000000000000
[341485.265308] SVI|RVI = 00|00 TPR Threshold = 0x00
[341485.265309] APIC-access addr = 0x0000000499d63000 virt-APIC addr =
0x000000011ade6000
[341485.265310] PostedIntrVec = 0xf2
[341485.265310] EPT pointer = 0x000000011fffc05e
[341485.265310] PLE Gap=00000080 Window=00010000
[341485.265311] Virtual processor ID = 0x0001
[341485.265312] S_CET = 0x0000000000000000
[341485.265312] SSP = 0x0000000000000000
[341485.265312] SSP TABLE = 0x0000000000000000

The alternative way is to clear it in exit_lmode(), it also works.  

> Paolo
Yang, Weijiang Feb. 1, 2021, 4:56 a.m. UTC | #4
On Fri, Jan 29, 2021 at 03:38:52PM +0100, Paolo Bonzini wrote:
> On 29/01/21 13:17, Yang Weijiang wrote:
> > > > It's specific to VM case, during VM reboot, memory mode reset but VM_ENTRY_LOAD_CET_STATE
> > > > is still set, and VMCS contains stale GUEST_SSP, this hits vm-entry failure
> > > > documented in 10.7 VM Entry at:
> > > > https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf
> > > > Since CR4.CET is also reset during VM reboot, to take the change to clear the stale data.
> > > > Maybe I need to find a better place to do the things.
> > > Then you must use a field of struct vmx_vcpu instead of the VMCS to hold
> > > GUEST_SSP (while GUEST_S_CET and GUEST_INTR_SSP_TABLE should not be an
> > > issue).
> > > 
> > Sorry, I don't get your point, can I just clear the GUEST_SSP field in this case?
> > Anyway save/restore GUEST_SSP via VMCS is an efficient way.
> 
> You cannot clear it, because it is preserved when CR4.CET is modified.
> 
> However, I checked the latest SDM and the GUEST_SSP rules are changed to
> just this:
> 
> SSP. The following checks are performed if the “load CET state” VM-entry
> control is 1
> — Bits 1:0 must be 0.
> — If the processor supports the Intel 64 architecture, bits 63:N must be
> identical, where N is the CPU’s maximum linear-address width. (This check
> does not apply if the processor supports 64 linear-address bits.) The guest
> SSP value is not required to be canonical; the value of bit N-1 may differ
> from that of bit N.
> 
> In particular it doesn't mention the "IA-32e mode guest" VM-entry control or
> the CS.L bit anymore, so it should not be necessary anymore to even reset
> SSP to 0, and you can keep GUEST_SSP in the VMCS.
>
The vm-entry failure issue is due to mismatch of MSR_KVM_GUEST_SSP between QEMU and KVM.
The original code is occupied by other usage, so QEMU cannot reset it properly.
Sorry for the noise!

> Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1620a2cca781..b3b5cb44e75b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -100,7 +100,8 @@ 
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
-			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+			  | X86_CR4_CET))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2c737337f466..6a888ee1c7db 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -405,7 +405,8 @@  void kvm_set_cpu_caps(void)
 		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
+		F(SHSTK)
 	);
 	/* Set LA57 based on hardware capability. */
 	if (cpuid_ecx(7) & F(LA57))
@@ -422,7 +423,7 @@  void kvm_set_cpu_caps(void)
 		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
 		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
 		F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
-		F(SERIALIZE) | F(TSXLDTRK)
+		F(SERIALIZE) | F(TSXLDTRK) | F(IBT)
 	);
 
 	/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 3a1861403d73..58cb57b08697 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -103,6 +103,11 @@  static inline bool cpu_has_load_perf_global_ctrl(void)
 	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
 }
 
+static inline bool cpu_has_load_cet_ctrl(void)
+{
+	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE) &&
+	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_CET_STATE);
+}
 static inline bool cpu_has_vmx_mpx(void)
 {
 	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 28ba8414a7a3..c88a6e1721b1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2279,7 +2279,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_U_CET:
 		if (!cet_is_control_msr_accessible(vcpu, msr_info))
 			return 1;
-		if (data & GENMASK(9, 6))
+		if ((data & GENMASK(9, 6)) || is_noncanonical_address(data, vcpu))
 			return 1;
 		if (msr_index == MSR_IA32_S_CET)
 			vmcs_writel(GUEST_S_CET, data);
@@ -2594,7 +2594,8 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	      VM_EXIT_LOAD_IA32_EFER |
 	      VM_EXIT_CLEAR_BNDCFGS |
 	      VM_EXIT_PT_CONCEAL_PIP |
-	      VM_EXIT_CLEAR_IA32_RTIT_CTL;
+	      VM_EXIT_CLEAR_IA32_RTIT_CTL |
+	      VM_EXIT_LOAD_CET_STATE;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
 				&_vmexit_control) < 0)
 		return -EIO;
@@ -2618,7 +2619,8 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	      VM_ENTRY_LOAD_IA32_EFER |
 	      VM_ENTRY_LOAD_BNDCFGS |
 	      VM_ENTRY_PT_CONCEAL_PIP |
-	      VM_ENTRY_LOAD_IA32_RTIT_CTL;
+	      VM_ENTRY_LOAD_IA32_RTIT_CTL |
+	      VM_ENTRY_LOAD_CET_STATE;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
 				&_vmentry_control) < 0)
 		return -EIO;
@@ -2646,6 +2648,15 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 		}
 	}
 
+	/*
+	 * The CET entry and exit controls need to be synchronized, e.g. to
+	 * avoid loading guest state but not restoring host state.
+	 */
+	if (!(_vmentry_control & VM_ENTRY_LOAD_CET_STATE) ||
+	    !(_vmexit_control & VM_EXIT_LOAD_CET_STATE)) {
+		_vmentry_control &= ~VM_ENTRY_LOAD_CET_STATE;
+		_vmexit_control &= ~VM_EXIT_LOAD_CET_STATE;
+	}
 
 	rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
 
@@ -3217,7 +3228,9 @@  int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	 * this bit, even if host CR4.MCE == 0.
 	 */
 	unsigned long hw_cr4;
+	unsigned long old_cr4;
 
+	old_cr4 = vmcs_readl(CR4_READ_SHADOW);
 	hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
 	if (is_unrestricted_guest(vcpu))
 		hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
@@ -3281,6 +3294,13 @@  int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
 	vmcs_writel(GUEST_CR4, hw_cr4);
+
+	if (((cr4 ^ old_cr4) & X86_CR4_CET) && kvm_cet_supported()) {
+		vmcs_writel(GUEST_SSP, 0);
+		vmcs_writel(GUEST_S_CET, 0);
+		vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
+	}
+
 	return 0;
 }
 
@@ -5961,6 +5981,12 @@  void dump_vmcs(void)
 		pr_err("InterruptStatus = %04x\n",
 		       vmcs_read16(GUEST_INTR_STATUS));
 
+	if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE) {
+		pr_err("S_CET = 0x%016lx\n", vmcs_readl(GUEST_S_CET));
+		pr_err("SSP = 0x%016lx\n", vmcs_readl(GUEST_SSP));
+		pr_err("SSP TABLE = 0x%016lx\n",
+		       vmcs_readl(GUEST_INTR_SSP_TABLE));
+	}
 	pr_err("*** Host State ***\n");
 	pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
 	       vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6035,6 +6061,12 @@  void dump_vmcs(void)
 	if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
 		pr_err("Virtual processor ID = 0x%04x\n",
 		       vmcs_read16(VIRTUAL_PROCESSOR_ID));
+	if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE) {
+		pr_err("S_CET = 0x%016lx\n", vmcs_readl(HOST_S_CET));
+		pr_err("SSP = 0x%016lx\n", vmcs_readl(HOST_SSP));
+		pr_err("SSP TABLE = 0x%016lx\n",
+		       vmcs_readl(HOST_INTR_SSP_TABLE));
+	}
 }
 
 /*
@@ -7409,6 +7441,15 @@  static __init void vmx_set_cpu_caps(void)
 
 	if (cpu_has_vmx_waitpkg())
 		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+	if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest) {
+		kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+		kvm_cpu_cap_clear(X86_FEATURE_IBT);
+	} else if (kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
+		   kvm_cpu_cap_has(X86_FEATURE_IBT)) {
+		supported_xss |= XFEATURE_MASK_CET_USER |
+				 XFEATURE_MASK_CET_KERNEL;
+	}
 }
 
 static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -7837,6 +7878,8 @@  static __init int hardware_setup(void)
 	unsigned long host_bndcfgs;
 	struct desc_ptr dt;
 	int r, i, ept_lpage_level;
+	u64 cet_msr;
+	bool accessible;
 
 	store_idt(&dt);
 	host_idt_base = dt.address;
@@ -7850,6 +7893,21 @@  static __init int hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	accessible = (supported_xss & XFEATURE_MASK_CET_KERNEL) &&
+		     (boot_cpu_has(X86_FEATURE_IBT) ||
+		      boot_cpu_has(X86_FEATURE_SHSTK));
+	if (accessible) {
+		rdmsrl(MSR_IA32_S_CET, cet_msr);
+		WARN_ONCE(cet_msr, "KVM: CET S_CET in host will be lost!\n");
+	}
+
+	accessible = (supported_xss & XFEATURE_MASK_CET_KERNEL) &&
+		     boot_cpu_has(X86_FEATURE_SHSTK);
+	if (accessible) {
+		rdmsrl(MSR_IA32_PL0_SSP, cet_msr);
+		WARN_ONCE(cet_msr, "KVM: CET PL0_SSP in host will be lost!\n");
+	}
+
 	if (boot_cpu_has(X86_FEATURE_MPX)) {
 		rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
 		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d573cadf5baf..a500c4e260af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10197,6 +10197,14 @@  int kvm_arch_hardware_setup(void *opaque)
 
 	if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
 		supported_xss = 0;
+	else
+		supported_xss &= host_xss;
+
+	/* Update CET features now that supported_xss is finalized. */
+	if (!kvm_cet_supported()) {
+		kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+		kvm_cpu_cap_clear(X86_FEATURE_IBT);
+	}
 
 #define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
 	cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);