diff mbox series

[v7,5/7] kvm: x86: Add CET CR4 bit and XSS support

Message ID 20190927021927.23057-6-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce support for Guest CET feature | expand

Commit Message

Yang, Weijiang Sept. 27, 2019, 2:19 a.m. UTC
CR4.CET(bit 23) is master enable bit for CET feature.
Previously, KVM did not support setting any bits in XSS
so it's hardcoded to check and inject a #GP if Guest
attempted to write a non-zero value to XSS, now it supports
CET related bits setting.

Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  4 +++-
 arch/x86/kvm/cpuid.c            | 11 +++++++++--
 arch/x86/kvm/vmx/vmx.c          |  6 +-----
 3 files changed, 13 insertions(+), 8 deletions(-)

Comments

Jim Mattson Oct. 2, 2019, 7:05 p.m. UTC | #1
On Thu, Sep 26, 2019 at 7:17 PM Yang Weijiang <weijiang.yang@intel.com> wrote:
>
> CR4.CET(bit 23) is master enable bit for CET feature.
> Previously, KVM did not support setting any bits in XSS
> so it's hardcoded to check and inject a #GP if Guest
> attempted to write a non-zero value to XSS, now it supports
> CET related bits setting.
>
> Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  4 +++-
>  arch/x86/kvm/cpuid.c            | 11 +++++++++--
>  arch/x86/kvm/vmx/vmx.c          |  6 +-----
>  3 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d018df8c5f32..8f97269d6d9f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -90,7 +90,8 @@
>                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
>                           | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
>                           | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> -                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> +                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> +                         | X86_CR4_CET))
>
>  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
>
> @@ -623,6 +624,7 @@ struct kvm_vcpu_arch {
>
>         u64 xcr0;
>         u64 guest_supported_xcr0;
> +       u64 guest_supported_xss;
>         u32 guest_xstate_size;
>
>         struct kvm_pio_request pio;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 0a47b9e565be..dd3ddc6daa58 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -120,8 +120,15 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>         }
>
>         best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> -       if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> -               best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> +       if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {

Is XSAVEC alone sufficient? Don't we explicitly need XSAVES to
save/restore the extended state components enumerated by IA32_XSS?

> +               u64 kvm_xss = kvm_supported_xss();
> +
> +               best->ebx =
> +                       xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);

Shouldn't this size be based on the *current* IA32_XSS value, rather
than the supported IA32_XSS bits? (i.e.
s/kvm_xss/vcpu->arch.ia32_xss/)

> +               vcpu->arch.guest_supported_xss = best->ecx & kvm_xss;

Shouldn't unsupported bits in best->ecx be masked off, so that the
guest CPUID doesn't mis-report the capabilities of the vCPU?

> +       } else {
> +               vcpu->arch.guest_supported_xss = 0;
> +       }
>
>         /*
>          * The existing code assumes virtual address is 48-bit or 57-bit in the
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ba1a83d11e69..44913e4ab558 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1973,11 +1973,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>                      !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
>                        guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
>                         return 1;
> -               /*
> -                * The only supported bit as of Skylake is bit 8, but
> -                * it is not supported on KVM.
> -                */
> -               if (data != 0)
> +               if (data & ~vcpu->arch.guest_supported_xss)
>                         return 1;
>                 vcpu->arch.ia32_xss = data;
>                 if (vcpu->arch.ia32_xss != host_xss)
> --
> 2.17.2
>
Sean Christopherson Oct. 17, 2019, 7:56 p.m. UTC | #2
On Wed, Oct 02, 2019 at 12:05:23PM -0700, Jim Mattson wrote:
> On Thu, Sep 26, 2019 at 7:17 PM Yang Weijiang <weijiang.yang@intel.com> wrote:
> >
> > CR4.CET(bit 23) is master enable bit for CET feature.
> > Previously, KVM did not support setting any bits in XSS
> > so it's hardcoded to check and inject a #GP if Guest
> > attempted to write a non-zero value to XSS, now it supports
> > CET related bits setting.
> >
> > Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> > Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  4 +++-
> >  arch/x86/kvm/cpuid.c            | 11 +++++++++--
> >  arch/x86/kvm/vmx/vmx.c          |  6 +-----
> >  3 files changed, 13 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index d018df8c5f32..8f97269d6d9f 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -90,7 +90,8 @@
> >                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> >                           | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> >                           | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> > -                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> > +                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> > +                         | X86_CR4_CET))
> >
> >  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
> >
> > @@ -623,6 +624,7 @@ struct kvm_vcpu_arch {
> >
> >         u64 xcr0;
> >         u64 guest_supported_xcr0;
> > +       u64 guest_supported_xss;
> >         u32 guest_xstate_size;
> >
> >         struct kvm_pio_request pio;
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 0a47b9e565be..dd3ddc6daa58 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -120,8 +120,15 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> >         }
> >
> >         best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > -       if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> > -               best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > +       if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> 
> Is XSAVEC alone sufficient? Don't we explicitly need XSAVES to
> save/restore the extended state components enumerated by IA32_XSS?

Hmm, I think the check would be ok as-is if vcpu->arch.ia32_xss is used
below, as ia32_xss is guaranteed to be zero if XSAVES isn't supported.

> > +               u64 kvm_xss = kvm_supported_xss();
> > +
> > +               best->ebx =
> > +                       xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);
> 
> Shouldn't this size be based on the *current* IA32_XSS value, rather
> than the supported IA32_XSS bits? (i.e.
> s/kvm_xss/vcpu->arch.ia32_xss/)

Ya.

> > +               vcpu->arch.guest_supported_xss = best->ecx & kvm_xss;
> 
> Shouldn't unsupported bits in best->ecx be masked off, so that the
> guest CPUID doesn't mis-report the capabilities of the vCPU?

I thought KVM liked to let userspace blow off their foot whenever possible?
KVM already enumerated what features are supported, it's a userspace bug
if it ignores the enumeration.

> > +       } else {
> > +               vcpu->arch.guest_supported_xss = 0;
> > +       }
> >
> >         /*
Yang, Weijiang Oct. 18, 2019, 1:58 a.m. UTC | #3
On Thu, Oct 17, 2019 at 12:56:42PM -0700, Sean Christopherson wrote:
> On Wed, Oct 02, 2019 at 12:05:23PM -0700, Jim Mattson wrote:
> > On Thu, Sep 26, 2019 at 7:17 PM Yang Weijiang <weijiang.yang@intel.com> wrote:
> > >
> > > CR4.CET(bit 23) is master enable bit for CET feature.
> > > Previously, KVM did not support setting any bits in XSS
> > > so it's hardcoded to check and inject a #GP if Guest
> > > attempted to write a non-zero value to XSS, now it supports
> > > CET related bits setting.
> > >
> > > Co-developed-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> > > Signed-off-by: Zhang Yi Z <yi.z.zhang@linux.intel.com>
> > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > ---
> > >  arch/x86/include/asm/kvm_host.h |  4 +++-
> > >  arch/x86/kvm/cpuid.c            | 11 +++++++++--
> > >  arch/x86/kvm/vmx/vmx.c          |  6 +-----
> > >  3 files changed, 13 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index d018df8c5f32..8f97269d6d9f 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -90,7 +90,8 @@
> > >                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> > >                           | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
> > >                           | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
> > > -                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
> > > +                         | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
> > > +                         | X86_CR4_CET))
> > >
> > >  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
> > >
> > > @@ -623,6 +624,7 @@ struct kvm_vcpu_arch {
> > >
> > >         u64 xcr0;
> > >         u64 guest_supported_xcr0;
> > > +       u64 guest_supported_xss;
> > >         u32 guest_xstate_size;
> > >
> > >         struct kvm_pio_request pio;
> > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > > index 0a47b9e565be..dd3ddc6daa58 100644
> > > --- a/arch/x86/kvm/cpuid.c
> > > +++ b/arch/x86/kvm/cpuid.c
> > > @@ -120,8 +120,15 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > >         }
> > >
> > >         best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > > -       if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> > > -               best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > > +       if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> > 
> > Is XSAVEC alone sufficient? Don't we explicitly need XSAVES to
> > save/restore the extended state components enumerated by IA32_XSS?
> 
> Hmm, I think the check would be ok as-is if vcpu->arch.ia32_xss is used
> below, as ia32_xss is guaranteed to be zero if XSAVES isn't supported.
> 
Thanks Sean having me re-capture this reply thread, it's lost in my
folder.
I added kvm_x86_ops->xsaves_supported() in kvm_supported_xss() and it
returns 0 if xsaves is not supported which suggested by Jim.

> > > +               u64 kvm_xss = kvm_supported_xss();
> > > +
> > > +               best->ebx =
> > > +                       xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);
> > 
> > Shouldn't this size be based on the *current* IA32_XSS value, rather
> > than the supported IA32_XSS bits? (i.e.
> > s/kvm_xss/vcpu->arch.ia32_xss/)
> 
> Ya.
>
I'm not sure if I understand correctly, kvm_xss is what KVM supports,
but arch.ia32_xss reflects what guest currently is using, shoudn't CPUID
report what KVM supports instead of current status?
Will CPUID match current IA32_XSS status if guest changes it runtime?

> > > +               vcpu->arch.guest_supported_xss = best->ecx & kvm_xss;
> > 
> > Shouldn't unsupported bits in best->ecx be masked off, so that the
> > guest CPUID doesn't mis-report the capabilities of the vCPU?
> 
> I thought KVM liked to let userspace blow off their foot whenever possible?
> KVM already enumerated what features are supported, it's a userspace bug
> if it ignores the enumeration.
> 
> > > +       } else {
> > > +               vcpu->arch.guest_supported_xss = 0;
> > > +       }
> > >
> > >         /*
Sean Christopherson Oct. 22, 2019, 8:13 p.m. UTC | #4
On Fri, Oct 18, 2019 at 09:58:02AM +0800, Yang Weijiang wrote:
> On Thu, Oct 17, 2019 at 12:56:42PM -0700, Sean Christopherson wrote:
> > On Wed, Oct 02, 2019 at 12:05:23PM -0700, Jim Mattson wrote:
> > > > +               u64 kvm_xss = kvm_supported_xss();
> > > > +
> > > > +               best->ebx =
> > > > +                       xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);
> > > 
> > > Shouldn't this size be based on the *current* IA32_XSS value, rather
> > > than the supported IA32_XSS bits? (i.e.
> > > s/kvm_xss/vcpu->arch.ia32_xss/)
> > 
> > Ya.
> >
> I'm not sure if I understand correctly, kvm_xss is what KVM supports,
> but arch.ia32_xss reflects what guest currently is using, shoudn't CPUID
> report what KVM supports instead of current status?
> Will CPUID match current IA32_XSS status if guest changes it runtime?

Not in this case.  Select CPUID output is dependent on current state as
opposed to being a constant defind by hardware.  Per the SDM, EBX is:

  The size in bytes of the XSAVE area containing all states enabled by
  XCRO | IA32_XSS

Since KVM is emulating CPUID for the guest, XCR0 and IA32_XSS in this
context refers to the guest's current/actual XCR0/IA32_XSS values.  The
purpose of this behavior is so that software can call CPUID to query the
actual amount of memory that is needed for XSAVE(S), as opposed to the
absolute max size that _might_ be needed.

MONITOR/MWAIT is the other case that comes to mind where CPUID dynamically
reflects configured state, e.g. MWAIT is reported as unsupported if it's
disabled via IA32_MISC_ENABLE MSR.
Yang, Weijiang Oct. 23, 2019, 1:19 a.m. UTC | #5
On Tue, Oct 22, 2019 at 01:13:21PM -0700, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 09:58:02AM +0800, Yang Weijiang wrote:
> > On Thu, Oct 17, 2019 at 12:56:42PM -0700, Sean Christopherson wrote:
> > > On Wed, Oct 02, 2019 at 12:05:23PM -0700, Jim Mattson wrote:
> > > > > +               u64 kvm_xss = kvm_supported_xss();
> > > > > +
> > > > > +               best->ebx =
> > > > > +                       xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);
> > > > 
> > > > Shouldn't this size be based on the *current* IA32_XSS value, rather
> > > > than the supported IA32_XSS bits? (i.e.
> > > > s/kvm_xss/vcpu->arch.ia32_xss/)
> > > 
> > > Ya.
> > >
> > I'm not sure if I understand correctly, kvm_xss is what KVM supports,
> > but arch.ia32_xss reflects what guest currently is using, shoudn't CPUID
> > report what KVM supports instead of current status?
> > Will CPUID match current IA32_XSS status if guest changes it runtime?
> 
> Not in this case.  Select CPUID output is dependent on current state as
> opposed to being a constant defind by hardware.  Per the SDM, EBX is:
> 
>   The size in bytes of the XSAVE area containing all states enabled by
>   XCRO | IA32_XSS
> 
> Since KVM is emulating CPUID for the guest, XCR0 and IA32_XSS in this
> context refers to the guest's current/actual XCR0/IA32_XSS values.  The
> purpose of this behavior is so that software can call CPUID to query the
> actual amount of memory that is needed for XSAVE(S), as opposed to the
> absolute max size that _might_ be needed.
> 
> MONITOR/MWAIT is the other case that comes to mind where CPUID dynamically
> reflects configured state, e.g. MWAIT is reported as unsupported if it's
> disabled via IA32_MISC_ENABLE MSR.
Yep, make sense, thank you for explanation.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d018df8c5f32..8f97269d6d9f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -90,7 +90,8 @@ 
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
-			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+			  | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+			  | X86_CR4_CET))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
@@ -623,6 +624,7 @@  struct kvm_vcpu_arch {
 
 	u64 xcr0;
 	u64 guest_supported_xcr0;
+	u64 guest_supported_xss;
 	u32 guest_xstate_size;
 
 	struct kvm_pio_request pio;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0a47b9e565be..dd3ddc6daa58 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -120,8 +120,15 @@  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	}
 
 	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
-	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
-		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
+		u64 kvm_xss = kvm_supported_xss();
+
+		best->ebx =
+			xstate_required_size(vcpu->arch.xcr0 | kvm_xss, true);
+		vcpu->arch.guest_supported_xss = best->ecx & kvm_xss;
+	} else {
+		vcpu->arch.guest_supported_xss = 0;
+	}
 
 	/*
 	 * The existing code assumes virtual address is 48-bit or 57-bit in the
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ba1a83d11e69..44913e4ab558 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1973,11 +1973,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		     !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
 		       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
 			return 1;
-		/*
-		 * The only supported bit as of Skylake is bit 8, but
-		 * it is not supported on KVM.
-		 */
-		if (data != 0)
+		if (data & ~vcpu->arch.guest_supported_xss)
 			return 1;
 		vcpu->arch.ia32_xss = data;
 		if (vcpu->arch.ia32_xss != host_xss)