diff mbox series

[v19,080/130] KVM: TDX: restore host xsave state when exit from the guest TD

Message ID 2894ed10014279f4b8caab582e3b7e7061b5dad3.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

On exiting from the guest TD, xsave state is clobbered.  Restore xsave
state on TD exit.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v19:
- Add EXPORT_SYMBOL_GPL(host_xcr0)

v15 -> v16:
- Added CET flag mask

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
 arch/x86/kvm/x86.c     |  1 +
 2 files changed, 20 insertions(+)

Comments

Chen Yu March 7, 2024, 8:32 a.m. UTC | #1
On 2024-02-26 at 00:26:22 -0800, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> state on TD exit.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v19:
> - Add EXPORT_SYMBOL_GPL(host_xcr0)
> 
> v15 -> v16:
> - Added CET flag mask
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>  arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
>  arch/x86/kvm/x86.c     |  1 +
>  2 files changed, 20 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 9616b1aab6ce..199226c6cf55 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -2,6 +2,7 @@
>  #include <linux/cpu.h>
>  #include <linux/mmu_context.h>
>  
> +#include <asm/fpu/xcr.h>
>  #include <asm/tdx.h>
>  
>  #include "capabilities.h"
> @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	 */
>  }
>  
> +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> +
> +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
> +	    host_xss != (kvm_tdx->xfam &
> +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
> +		wrmsrl(MSR_IA32_XSS, host_xss);
> +	if (static_cpu_has(X86_FEATURE_PKU) &&
> +	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
> +		write_pkru(vcpu->arch.host_pkru);
> +}

Maybe one minor question regarding the pkru restore. In the non-TDX version
kvm_load_host_xsave_state(), it first tries to read the current setting
vcpu->arch.pkru = rdpkru(); if this setting does not equal to host_pkru,
it trigger the write_pkru on host. Does it mean we can also leverage that mechanism
in TDX to avoid 1 pkru write(I guess pkru write is costly than a read pkru)?

thanks,
Chenyu
Isaku Yamahata March 8, 2024, 8:58 p.m. UTC | #2
On Thu, Mar 07, 2024 at 04:32:16PM +0800,
Chen Yu <yu.c.chen@intel.com> wrote:

> On 2024-02-26 at 00:26:22 -0800, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> > state on TD exit.
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > v19:
> > - Add EXPORT_SYMBOL_GPL(host_xcr0)
> > 
> > v15 -> v16:
> > - Added CET flag mask
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> >  arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
> >  arch/x86/kvm/x86.c     |  1 +
> >  2 files changed, 20 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 9616b1aab6ce..199226c6cf55 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -2,6 +2,7 @@
> >  #include <linux/cpu.h>
> >  #include <linux/mmu_context.h>
> >  
> > +#include <asm/fpu/xcr.h>
> >  #include <asm/tdx.h>
> >  
> >  #include "capabilities.h"
> > @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> >  	 */
> >  }
> >  
> > +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> > +
> > +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> > +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> > +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> > +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
> > +	    host_xss != (kvm_tdx->xfam &
> > +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
> > +		wrmsrl(MSR_IA32_XSS, host_xss);
> > +	if (static_cpu_has(X86_FEATURE_PKU) &&
> > +	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
> > +		write_pkru(vcpu->arch.host_pkru);
> > +}
> 
> Maybe one minor question regarding the pkru restore. In the non-TDX version
> kvm_load_host_xsave_state(), it first tries to read the current setting
> vcpu->arch.pkru = rdpkru(); if this setting does not equal to host_pkru,
> it trigger the write_pkru on host. Does it mean we can also leverage that mechanism
> in TDX to avoid 1 pkru write(I guess pkru write is costly than a read pkru)?

Yes, that's the intention.  When we set the PKRU feature for the guest, TDX
module unconditionally initialize pkru.  Do you have use case that wrpkru()
(without rdpkru()) is better?
Chen Yu March 9, 2024, 4:28 p.m. UTC | #3
On 2024-03-08 at 12:58:38 -0800, Isaku Yamahata wrote:
> On Thu, Mar 07, 2024 at 04:32:16PM +0800,
> Chen Yu <yu.c.chen@intel.com> wrote:
> 
> > On 2024-02-26 at 00:26:22 -0800, isaku.yamahata@intel.com wrote:
> > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > 
> > > On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> > > state on TD exit.
> > > 
> > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > ---
> > > v19:
> > > - Add EXPORT_SYMBOL_GPL(host_xcr0)
> > > 
> > > v15 -> v16:
> > > - Added CET flag mask
> > > 
> > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > ---
> > >  arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
> > >  arch/x86/kvm/x86.c     |  1 +
> > >  2 files changed, 20 insertions(+)
> > > 
> > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > > index 9616b1aab6ce..199226c6cf55 100644
> > > --- a/arch/x86/kvm/vmx/tdx.c
> > > +++ b/arch/x86/kvm/vmx/tdx.c
> > > @@ -2,6 +2,7 @@
> > >  #include <linux/cpu.h>
> > >  #include <linux/mmu_context.h>
> > >  
> > > +#include <asm/fpu/xcr.h>
> > >  #include <asm/tdx.h>
> > >  
> > >  #include "capabilities.h"
> > > @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > >  	 */
> > >  }
> > >  
> > > +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> > > +{
> > > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> > > +
> > > +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> > > +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> > > +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> > > +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> > > +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
> > > +	    host_xss != (kvm_tdx->xfam &
> > > +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
> > > +		wrmsrl(MSR_IA32_XSS, host_xss);
> > > +	if (static_cpu_has(X86_FEATURE_PKU) &&
> > > +	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
> > > +		write_pkru(vcpu->arch.host_pkru);
> > > +}
> > 
> > Maybe one minor question regarding the pkru restore. In the non-TDX version
> > kvm_load_host_xsave_state(), it first tries to read the current setting
> > vcpu->arch.pkru = rdpkru(); if this setting does not equal to host_pkru,
> > it trigger the write_pkru on host. Does it mean we can also leverage that mechanism
> > in TDX to avoid 1 pkru write(I guess pkru write is costly than a read pkru)?
> 
> Yes, that's the intention.  When we set the PKRU feature for the guest, TDX
> module unconditionally initialize pkru.

I see, thanks for the information. Please correct me if I'm wrong, and I'm not sure
if wrpkru instruction would trigger the TD exit. The TDX module spec[1] mentioned PKS
(protected key for supervisor pages), but does not metion PKU for user pages. PKS
is controlled by MSR IA32_PKRS. The TDX module will passthrough the MSR IA32_PKRS
write in TD, because TDX module clears the PKS bitmap in VMCS:
https://github.com/intel/tdx-module/blob/tdx_1.5/src/common/helpers/helpers.c#L1723
so neither write to MSR IA32_PKRS nor wrpkru triggers TD exit.

However, after a second thought, I found that after commit 72a6c08c44e4, the current
code should not be a problem, because write_pkru() would first read the current pkru
settings and decide whether to update to the pkru register.

> Do you have use case that wrpkru()
> (without rdpkru()) is better?

I don't have use case yet. But with/without rdpkru() in tdx_restore_host_xsave_state(),
there is no much difference because write_pkru() has taken care of it if I understand
correctly.

thanks,
Chenyu
Isaku Yamahata March 12, 2024, 2:03 a.m. UTC | #4
On Sun, Mar 10, 2024 at 12:28:55AM +0800,
Chen Yu <yu.c.chen@intel.com> wrote:

> On 2024-03-08 at 12:58:38 -0800, Isaku Yamahata wrote:
> > On Thu, Mar 07, 2024 at 04:32:16PM +0800,
> > Chen Yu <yu.c.chen@intel.com> wrote:
> > 
> > > On 2024-02-26 at 00:26:22 -0800, isaku.yamahata@intel.com wrote:
> > > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > > 
> > > > On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> > > > state on TD exit.
> > > > 
> > > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > > ---
> > > > v19:
> > > > - Add EXPORT_SYMBOL_GPL(host_xcr0)
> > > > 
> > > > v15 -> v16:
> > > > - Added CET flag mask
> > > > 
> > > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > > ---
> > > >  arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
> > > >  arch/x86/kvm/x86.c     |  1 +
> > > >  2 files changed, 20 insertions(+)
> > > > 
> > > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > > > index 9616b1aab6ce..199226c6cf55 100644
> > > > --- a/arch/x86/kvm/vmx/tdx.c
> > > > +++ b/arch/x86/kvm/vmx/tdx.c
> > > > @@ -2,6 +2,7 @@
> > > >  #include <linux/cpu.h>
> > > >  #include <linux/mmu_context.h>
> > > >  
> > > > +#include <asm/fpu/xcr.h>
> > > >  #include <asm/tdx.h>
> > > >  
> > > >  #include "capabilities.h"
> > > > @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > > >  	 */
> > > >  }
> > > >  
> > > > +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> > > > +{
> > > > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> > > > +
> > > > +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> > > > +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> > > > +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> > > > +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> > > > +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
> > > > +	    host_xss != (kvm_tdx->xfam &
> > > > +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
> > > > +		wrmsrl(MSR_IA32_XSS, host_xss);
> > > > +	if (static_cpu_has(X86_FEATURE_PKU) &&
> > > > +	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
> > > > +		write_pkru(vcpu->arch.host_pkru);
> > > > +}
> > > 
> > > Maybe one minor question regarding the pkru restore. In the non-TDX version
> > > kvm_load_host_xsave_state(), it first tries to read the current setting
> > > vcpu->arch.pkru = rdpkru(); if this setting does not equal to host_pkru,
> > > it trigger the write_pkru on host. Does it mean we can also leverage that mechanism
> > > in TDX to avoid 1 pkru write(I guess pkru write is costly than a read pkru)?
> > 
> > Yes, that's the intention.  When we set the PKRU feature for the guest, TDX
> > module unconditionally initialize pkru.
> 
> I see, thanks for the information. Please correct me if I'm wrong, and I'm not sure
> if wrpkru instruction would trigger the TD exit. The TDX module spec[1] mentioned PKS
> (protected key for supervisor pages), but does not metion PKU for user pages. PKS
> is controlled by MSR IA32_PKRS. The TDX module will passthrough the MSR IA32_PKRS
> write in TD, because TDX module clears the PKS bitmap in VMCS:
> https://github.com/intel/tdx-module/blob/tdx_1.5/src/common/helpers/helpers.c#L1723
> so neither write to MSR IA32_PKRS nor wrpkru triggers TD exit.

wrpkru instruction in TDX guest doesn't cause exit to TDX module.  TDX module
runs with CR4.PKE=0.  The value of pkru doesn't matter to the TDX module.
When exiting from TDX module to the host VMM, PKRU is initialized to zero with
xrestr.  So it doesn't matter.

We need to refer to NP-SEAMLDR for the register value for TDX module on
SEAMCALL. It sets up the register values for TDX module on SEAMCALL.


> However, after a second thought, I found that after commit 72a6c08c44e4, the current
> code should not be a problem, because write_pkru() would first read the current pkru
> settings and decide whether to update to the pkru register.
> 
> > Do you have use case that wrpkru()
> > (without rdpkru()) is better?
> 
> I don't have use case yet. But with/without rdpkru() in tdx_restore_host_xsave_state(),
> there is no much difference because write_pkru() has taken care of it if I understand
> correctly.

The code in this hunk is TDX version of kvm_load_guest_xsave_state().  We case 
follow the VMX case at the moment.
Binbin Wu April 7, 2024, 3:47 a.m. UTC | #5
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> state on TD exit.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v19:
> - Add EXPORT_SYMBOL_GPL(host_xcr0)
>
> v15 -> v16:
> - Added CET flag mask
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
>   arch/x86/kvm/x86.c     |  1 +
>   2 files changed, 20 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 9616b1aab6ce..199226c6cf55 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -2,6 +2,7 @@
>   #include <linux/cpu.h>
>   #include <linux/mmu_context.h>
>   
> +#include <asm/fpu/xcr.h>
>   #include <asm/tdx.h>
>   
>   #include "capabilities.h"
> @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>   	 */
>   }
>   
> +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> +
> +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
The comment needs to be updated to reflect the case for CET.

> +	    host_xss != (kvm_tdx->xfam &
> +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))

For TDX_TD_XFAM_CET, maybe no need to make it TDX specific?

BTW, the definitions for XFEATURE_MASK_CET_USER/XFEATURE_MASK_CET_KERNEL 
have been merged.
https://lore.kernel.org/all/20230613001108.3040476-25-rick.p.edgecombe%40intel.com
You can resolve the TODO in 
https://lore.kernel.org/kvm/5eca97e6a3978cf4dcf1cff21be6ec8b639a66b9.1708933498.git.isaku.yamahata@intel.com/

> +		wrmsrl(MSR_IA32_XSS, host_xss);
> +	if (static_cpu_has(X86_FEATURE_PKU) &&
> +	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
> +		write_pkru(vcpu->arch.host_pkru);
> +}
> +
>   static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
>   {
>   	struct tdx_module_args args;
> @@ -609,6 +627,7 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
>   
>   	tdx_vcpu_enter_exit(tdx);
>   
> +	tdx_restore_host_xsave_state(vcpu);
>   	tdx->host_state_need_restore = true;
>   
>   	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 23ece956c816..b361d948140f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -315,6 +315,7 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>   };
>   
>   u64 __read_mostly host_xcr0;
> +EXPORT_SYMBOL_GPL(host_xcr0);
>   
>   static struct kmem_cache *x86_emulator_cache;
>
Isaku Yamahata April 12, 2024, 8:19 p.m. UTC | #6
On Sun, Apr 07, 2024 at 11:47:00AM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> 
> On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > On exiting from the guest TD, xsave state is clobbered.  Restore xsave
> > state on TD exit.
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > v19:
> > - Add EXPORT_SYMBOL_GPL(host_xcr0)
> > 
> > v15 -> v16:
> > - Added CET flag mask
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> >   arch/x86/kvm/vmx/tdx.c | 19 +++++++++++++++++++
> >   arch/x86/kvm/x86.c     |  1 +
> >   2 files changed, 20 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 9616b1aab6ce..199226c6cf55 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -2,6 +2,7 @@
> >   #include <linux/cpu.h>
> >   #include <linux/mmu_context.h>
> > +#include <asm/fpu/xcr.h>
> >   #include <asm/tdx.h>
> >   #include "capabilities.h"
> > @@ -534,6 +535,23 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> >   	 */
> >   }
> > +static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> > +
> > +	if (static_cpu_has(X86_FEATURE_XSAVE) &&
> > +	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
> > +	if (static_cpu_has(X86_FEATURE_XSAVES) &&
> > +	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
> The comment needs to be updated to reflect the case for CET.
> 
> > +	    host_xss != (kvm_tdx->xfam &
> > +			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
> 
> For TDX_TD_XFAM_CET, maybe no need to make it TDX specific?
> 
> BTW, the definitions for XFEATURE_MASK_CET_USER/XFEATURE_MASK_CET_KERNEL
> have been merged.
> https://lore.kernel.org/all/20230613001108.3040476-25-rick.p.edgecombe%40intel.com
> You can resolve the TODO in https://lore.kernel.org/kvm/5eca97e6a3978cf4dcf1cff21be6ec8b639a66b9.1708933498.git.isaku.yamahata@intel.com/

Yes, will update those constants to use the one in arch/x86/include/asm/fpu/types.h
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 9616b1aab6ce..199226c6cf55 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -2,6 +2,7 @@ 
 #include <linux/cpu.h>
 #include <linux/mmu_context.h>
 
+#include <asm/fpu/xcr.h>
 #include <asm/tdx.h>
 
 #include "capabilities.h"
@@ -534,6 +535,23 @@  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 }
 
+static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
+
+	if (static_cpu_has(X86_FEATURE_XSAVE) &&
+	    host_xcr0 != (kvm_tdx->xfam & kvm_caps.supported_xcr0))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+	if (static_cpu_has(X86_FEATURE_XSAVES) &&
+	    /* PT can be exposed to TD guest regardless of KVM's XSS support */
+	    host_xss != (kvm_tdx->xfam &
+			 (kvm_caps.supported_xss | XFEATURE_MASK_PT | TDX_TD_XFAM_CET)))
+		wrmsrl(MSR_IA32_XSS, host_xss);
+	if (static_cpu_has(X86_FEATURE_PKU) &&
+	    (kvm_tdx->xfam & XFEATURE_MASK_PKRU))
+		write_pkru(vcpu->arch.host_pkru);
+}
+
 static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
 {
 	struct tdx_module_args args;
@@ -609,6 +627,7 @@  fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	tdx_vcpu_enter_exit(tdx);
 
+	tdx_restore_host_xsave_state(vcpu);
 	tdx->host_state_need_restore = true;
 
 	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 23ece956c816..b361d948140f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -315,6 +315,7 @@  const struct kvm_stats_header kvm_vcpu_stats_header = {
 };
 
 u64 __read_mostly host_xcr0;
+EXPORT_SYMBOL_GPL(host_xcr0);
 
 static struct kmem_cache *x86_emulator_cache;