diff mbox series

[v19,078/130] KVM: TDX: Implement TDX vcpu enter/exit path

Message ID dbaa6b1a6c4ebb1400be5f7099b4b9e3b54431bb.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

This patch implements running TDX vcpu.  Once vcpu runs on the logical
processor (LP), the TDX vcpu is associated with it.  When the TDX vcpu
moves to another LP, the TDX vcpu needs to flush its status on the LP.
When destroying TDX vcpu, it needs to complete flush and flush cpu memory
cache.  Track which LP the TDX vcpu run and flush it as necessary.

Do nothing on sched_in event as TDX doesn't support pause loop.

TDX vcpu execution requires restoring PMU debug store after returning back
to KVM because the TDX module unconditionally resets the value.  To reuse
the existing code, export perf_restore_debug_store.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>

---
v19:
- Removed export_symbol_gpl(host_xcr0) to the patch that uses it

Changes v15 -> v16:
- use __seamcall_saved_ret()
- As struct tdx_module_args doesn't match with vcpu.arch.regs, copy regs
  before/after calling __seamcall_saved_ret().

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/main.c    | 21 +++++++++-
 arch/x86/kvm/vmx/tdx.c     | 84 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/tdx.h     | 33 +++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h |  2 +
 4 files changed, 138 insertions(+), 2 deletions(-)

Comments

Sean Christopherson March 15, 2024, 5:26 p.m. UTC | #1
On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote:
> +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> +{
> +	struct tdx_module_args args;
> +
> +	/*
> +	 * Avoid section mismatch with to_tdx() with KVM_VM_BUG().  The caller
> +	 * should call to_tdx().

C'mon.  I don't think it's unreasonable to expect that at least one of the many
people working on TDX would figure out why to_vmx() is __always_inline.

> +	 */
> +	struct kvm_vcpu *vcpu = &tdx->vcpu;
> +
> +	guest_state_enter_irqoff();
> +
> +	/*
> +	 * TODO: optimization:
> +	 * - Eliminate copy between args and vcpu->arch.regs.
> +	 * - copyin/copyout registers only if (tdx->tdvmvall.regs_mask != 0)
> +	 *   which means TDG.VP.VMCALL.
> +	 */
> +	args = (struct tdx_module_args) {
> +		.rcx = tdx->tdvpr_pa,
> +#define REG(reg, REG)	.reg = vcpu->arch.regs[VCPU_REGS_ ## REG]

Organizing tdx_module_args's registers by volatile vs. non-volatile is asinine.
This code should not need to exist.

> +	WARN_ON_ONCE(!kvm_rebooting &&
> +		     (tdx->exit_reason.full & TDX_SW_ERROR) == TDX_SW_ERROR);
> +
> +	guest_state_exit_irqoff();
> +}
> +
> +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> +	if (unlikely(!tdx->initialized))
> +		return -EINVAL;
> +	if (unlikely(vcpu->kvm->vm_bugged)) {
> +		tdx->exit_reason.full = TDX_NON_RECOVERABLE_VCPU;
> +		return EXIT_FASTPATH_NONE;
> +	}
> +
> +	trace_kvm_entry(vcpu);
> +
> +	tdx_vcpu_enter_exit(tdx);
> +
> +	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
> +	trace_kvm_exit(vcpu, KVM_ISA_VMX);
> +
> +	return EXIT_FASTPATH_NONE;
> +}
> +
>  void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
>  {
>  	WARN_ON_ONCE(root_hpa & ~PAGE_MASK);
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index d822e790e3e5..81d301fbe638 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -27,6 +27,37 @@ struct kvm_tdx {
>  	struct page *source_page;
>  };
>  
> +union tdx_exit_reason {
> +	struct {
> +		/* 31:0 mirror the VMX Exit Reason format */

Then use "union vmx_exit_reason", having to maintain duplicate copies of the same
union is not something I want to do.

I'm honestly not even convinced that "union tdx_exit_reason" needs to exist.  I
added vmx_exit_reason because we kept having bugs where KVM would fail to strip
bits 31:16, and because nested VMX needs to stuff failed_vmentry, but I don't
see a similar need for TDX.

I would even go so far as to say the vcpu_tdx field shouldn't be exit_reason,
and instead should be "return_code" or something.  E.g. if the TDX module refuses
to run the vCPU, there's no VM-Enter and thus no VM-Exit (unless you count the
SEAMCALL itself, har har).  Ditto for #GP or #UD on the SEAMCALL (or any other
reason that generates TDX_SW_ERROR).

Ugh, I'm doubling down on that suggesting.  This:

	WARN_ON_ONCE(!kvm_rebooting &&
		     (tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR);

	if ((u16)tdx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
	    is_nmi(tdexit_intr_info(vcpu))) {
		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
		vmx_do_nmi_irqoff();
		kvm_after_interrupt(vcpu);
	}

is heinous.  If there's an error that leaves bits 15:0 zero, KVM will synthesize
a spurious NMI.  I don't know whether or not that can happen, but it's not
something that should even be possible in KVM, i.e. the exit reason should be
processed if and only if KVM *knows* there was a sane VM-Exit from non-root mode.

tdx_vcpu_run() has a similar issue, though it's probably benign.  If there's an
error in bits 15:0 that happens to collide with EXIT_REASON_TDCALL, weird things
will happen.

	if (tdx->exit_reason.basic == EXIT_REASON_TDCALL)
		tdx->tdvmcall.rcx = vcpu->arch.regs[VCPU_REGS_RCX];
	else
		tdx->tdvmcall.rcx = 0;

I vote for something like the below, with much more robust checking of vp_enter_ret
before it's converted to a VMX exit reason.

static __always_inline union vmx_exit_reason tdexit_exit_reason(struct kvm_vcpu *vcpu)
{
	return (u32)vcpu->vp_enter_ret;
}

diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index af3a2b8afee8..b9b40b2eaccb 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -43,37 +43,6 @@ struct kvm_tdx {
        struct page *source_page;
 };
 
-union tdx_exit_reason {
-       struct {
-               /* 31:0 mirror the VMX Exit Reason format */
-               u64 basic               : 16;
-               u64 reserved16          : 1;
-               u64 reserved17          : 1;
-               u64 reserved18          : 1;
-               u64 reserved19          : 1;
-               u64 reserved20          : 1;
-               u64 reserved21          : 1;
-               u64 reserved22          : 1;
-               u64 reserved23          : 1;
-               u64 reserved24          : 1;
-               u64 reserved25          : 1;
-               u64 bus_lock_detected   : 1;
-               u64 enclave_mode        : 1;
-               u64 smi_pending_mtf     : 1;
-               u64 smi_from_vmx_root   : 1;
-               u64 reserved30          : 1;
-               u64 failed_vmentry      : 1;
-
-               /* 63:32 are TDX specific */
-               u64 details_l1          : 8;
-               u64 class               : 8;
-               u64 reserved61_48       : 14;
-               u64 non_recoverable     : 1;
-               u64 error               : 1;
-       };
-       u64 full;
-};
-
 struct vcpu_tdx {
        struct kvm_vcpu vcpu;
 
@@ -103,7 +72,8 @@ struct vcpu_tdx {
                };
                u64 rcx;
        } tdvmcall;
-       union tdx_exit_reason exit_reason;
+
+       u64 vp_enter_ret;
 
        bool initialized;
Isaku Yamahata March 15, 2024, 8:42 p.m. UTC | #2
On Fri, Mar 15, 2024 at 10:26:30AM -0700,
Sean Christopherson <seanjc@google.com> wrote:

> > diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> > index d822e790e3e5..81d301fbe638 100644
> > --- a/arch/x86/kvm/vmx/tdx.h
> > +++ b/arch/x86/kvm/vmx/tdx.h
> > @@ -27,6 +27,37 @@ struct kvm_tdx {
> >  	struct page *source_page;
> >  };
> >  
> > +union tdx_exit_reason {
> > +	struct {
> > +		/* 31:0 mirror the VMX Exit Reason format */
> 
> Then use "union vmx_exit_reason", having to maintain duplicate copies of the same
> union is not something I want to do.
> 
> I'm honestly not even convinced that "union tdx_exit_reason" needs to exist.  I
> added vmx_exit_reason because we kept having bugs where KVM would fail to strip
> bits 31:16, and because nested VMX needs to stuff failed_vmentry, but I don't
> see a similar need for TDX.
> 
> I would even go so far as to say the vcpu_tdx field shouldn't be exit_reason,
> and instead should be "return_code" or something.  E.g. if the TDX module refuses
> to run the vCPU, there's no VM-Enter and thus no VM-Exit (unless you count the
> SEAMCALL itself, har har).  Ditto for #GP or #UD on the SEAMCALL (or any other
> reason that generates TDX_SW_ERROR).
> 
> Ugh, I'm doubling down on that suggesting.  This:
> 
> 	WARN_ON_ONCE(!kvm_rebooting &&
> 		     (tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR);
> 
> 	if ((u16)tdx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> 	    is_nmi(tdexit_intr_info(vcpu))) {
> 		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> 		vmx_do_nmi_irqoff();
> 		kvm_after_interrupt(vcpu);
> 	}
> 
> is heinous.  If there's an error that leaves bits 15:0 zero, KVM will synthesize
> a spurious NMI.  I don't know whether or not that can happen, but it's not
> something that should even be possible in KVM, i.e. the exit reason should be
> processed if and only if KVM *knows* there was a sane VM-Exit from non-root mode.
> 
> tdx_vcpu_run() has a similar issue, though it's probably benign.  If there's an
> error in bits 15:0 that happens to collide with EXIT_REASON_TDCALL, weird things
> will happen.
> 
> 	if (tdx->exit_reason.basic == EXIT_REASON_TDCALL)
> 		tdx->tdvmcall.rcx = vcpu->arch.regs[VCPU_REGS_RCX];
> 	else
> 		tdx->tdvmcall.rcx = 0;
> 
> I vote for something like the below, with much more robust checking of vp_enter_ret
> before it's converted to a VMX exit reason.
> 
> static __always_inline union vmx_exit_reason tdexit_exit_reason(struct kvm_vcpu *vcpu)
> {
> 	return (u32)vcpu->vp_enter_ret;
> }

Thank you for the concrete suggestion.  Let me explore what safe guard check
can be done to make exit path robust.
Edgecombe, Rick P March 18, 2024, 9:01 p.m. UTC | #3
On Mon, 2024-02-26 at 00:26 -0800, isaku.yamahata@intel.com wrote:
> +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
> +{
> +       struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> +       if (unlikely(!tdx->initialized))
> +               return -EINVAL;
> +       if (unlikely(vcpu->kvm->vm_bugged)) {
> +               tdx->exit_reason.full = TDX_NON_RECOVERABLE_VCPU;
> +               return EXIT_FASTPATH_NONE;
> +       }
> +

Isaku, can you elaborate on why this needs special handling? There is a
check in vcpu_enter_guest() like:
	if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
		r = -EIO;
		goto out;
	}

Instead it returns a SEAM error code for something actuated by KVM. But
can it even be reached because of the other check? Not sure if there is
a problem, just sticks out to me and wondering whats going on.
Isaku Yamahata March 18, 2024, 11:40 p.m. UTC | #4
On Mon, Mar 18, 2024 at 09:01:05PM +0000,
"Edgecombe, Rick P" <rick.p.edgecombe@intel.com> wrote:

> On Mon, 2024-02-26 at 00:26 -0800, isaku.yamahata@intel.com wrote:
> > +fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
> > +{
> > +       struct vcpu_tdx *tdx = to_tdx(vcpu);
> > +
> > +       if (unlikely(!tdx->initialized))
> > +               return -EINVAL;
> > +       if (unlikely(vcpu->kvm->vm_bugged)) {
> > +               tdx->exit_reason.full = TDX_NON_RECOVERABLE_VCPU;
> > +               return EXIT_FASTPATH_NONE;
> > +       }
> > +
> 
> Isaku, can you elaborate on why this needs special handling? There is a
> check in vcpu_enter_guest() like:
> 	if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
> 		r = -EIO;
> 		goto out;
> 	}
> 
> Instead it returns a SEAM error code for something actuated by KVM. But
> can it even be reached because of the other check? Not sure if there is
> a problem, just sticks out to me and wondering whats going on.

The original intention is to get out the inner loop.  As Sean pointed it out,
the current code does poor job to check error of
__seamcall_saved_ret(TDH_VP_ENTER).  So it fails to call KVM_BUG_ON() when it
returns unexcepted error.

The right fix is to properly check an error from TDH_VP_ENTER and call
KVM_BUG_ON().  Then the check you pointed out should go away.

  for // out loop
     if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
         for // inner loop
           vcpu_run()
           kvm_vcpu_exit_request(vcpu).
Kirill A. Shutemov April 4, 2024, 1:22 p.m. UTC | #5
On Mon, Feb 26, 2024 at 12:26:20AM -0800, isaku.yamahata@intel.com wrote:
> @@ -491,6 +494,87 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  	 */
>  }
>  
> +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> +{

...

> +	tdx->exit_reason.full = __seamcall_saved_ret(TDH_VP_ENTER, &args);

Call to __seamcall_saved_ret() leaves noinstr section.

__seamcall_saved_ret() has to be moved:

diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
index e32cf82ed47e..6b434ab12db6 100644
--- a/arch/x86/virt/vmx/tdx/seamcall.S
+++ b/arch/x86/virt/vmx/tdx/seamcall.S
@@ -44,6 +44,8 @@ SYM_FUNC_START(__seamcall_ret)
 SYM_FUNC_END(__seamcall_ret)
 EXPORT_SYMBOL_GPL(__seamcall_ret);
 
+.section .noinstr.text, "ax"
+
 /*
  * __seamcall_saved_ret() - Host-side interface functions to SEAM software
  * (the P-SEAMLDR or the TDX module), with saving output registers to the
Huang, Kai April 4, 2024, 9:51 p.m. UTC | #6
On Thu, 2024-04-04 at 16:22 +0300, Kirill A. Shutemov wrote:
> On Mon, Feb 26, 2024 at 12:26:20AM -0800, isaku.yamahata@intel.com wrote:
> > @@ -491,6 +494,87 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> >  	 */
> >  }
> >  
> > +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> > +{
> 
> ...
> 
> > +	tdx->exit_reason.full = __seamcall_saved_ret(TDH_VP_ENTER, &args);
> 
> Call to __seamcall_saved_ret() leaves noinstr section.
> 
> __seamcall_saved_ret() has to be moved:
> 
> diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
> index e32cf82ed47e..6b434ab12db6 100644
> --- a/arch/x86/virt/vmx/tdx/seamcall.S
> +++ b/arch/x86/virt/vmx/tdx/seamcall.S
> @@ -44,6 +44,8 @@ SYM_FUNC_START(__seamcall_ret)
>  SYM_FUNC_END(__seamcall_ret)
>  EXPORT_SYMBOL_GPL(__seamcall_ret);
>  
> +.section .noinstr.text, "ax"
> +
>  /*
>   * __seamcall_saved_ret() - Host-side interface functions to SEAM software
>   * (the P-SEAMLDR or the TDX module), with saving output registers to the

Alternatively, I think we can explicitly use instrumentation_begin()/end()
around __seamcall_saved_ret() here.

__seamcall_saved_ret() could be used in the future for new SEAMCALLs (e.g.,
TDH.MEM.IMPORT) for TDX guest live migration.  And for that I don't think the
caller(s) is/are tagged with noinstr.
Sean Christopherson April 4, 2024, 10:45 p.m. UTC | #7
On Thu, Apr 04, 2024, Kai Huang wrote:
> On Thu, 2024-04-04 at 16:22 +0300, Kirill A. Shutemov wrote:
> > On Mon, Feb 26, 2024 at 12:26:20AM -0800, isaku.yamahata@intel.com wrote:
> > > @@ -491,6 +494,87 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > >  	 */
> > >  }
> > >  
> > > +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> > > +{
> > 
> > ...
> > 
> > > +	tdx->exit_reason.full = __seamcall_saved_ret(TDH_VP_ENTER, &args);
> > 
> > Call to __seamcall_saved_ret() leaves noinstr section.
> > 
> > __seamcall_saved_ret() has to be moved:
> > 
> > diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
> > index e32cf82ed47e..6b434ab12db6 100644
> > --- a/arch/x86/virt/vmx/tdx/seamcall.S
> > +++ b/arch/x86/virt/vmx/tdx/seamcall.S
> > @@ -44,6 +44,8 @@ SYM_FUNC_START(__seamcall_ret)
> >  SYM_FUNC_END(__seamcall_ret)
> >  EXPORT_SYMBOL_GPL(__seamcall_ret);
> >  
> > +.section .noinstr.text, "ax"
> > +
> >  /*
> >   * __seamcall_saved_ret() - Host-side interface functions to SEAM software
> >   * (the P-SEAMLDR or the TDX module), with saving output registers to the
> 
> Alternatively, I think we can explicitly use instrumentation_begin()/end()
> around __seamcall_saved_ret() here.

No, that will just paper over the complaint.  Dang it, I was going to say that
I called out earlier that tdx_vcpu_enter_exit() doesn't need to be noinstr, but
it looks like my brain and fingers didn't connect.

So I'll say it now :-)

I don't think tdx_vcpu_enter_exit() needs to be noinstr, because the SEAMCALL is
functionally a VM-Exit, and so all host state is saved/restored "atomically"
across the SEAMCALL (some by hardware, some by software (TDX-module)).

The reason the VM-Enter flows for VMX and SVM need to be noinstr is they do things
like load the guest's CR2, and handle NMI VM-Exits with NMIs blocks.  None of
that applies to TDX.  Either that, or there are some massive bugs lurking due to
missing code.
Huang, Kai April 4, 2024, 11:28 p.m. UTC | #8
On Thu, 2024-04-04 at 15:45 -0700, Sean Christopherson wrote:
> On Thu, Apr 04, 2024, Kai Huang wrote:
> > On Thu, 2024-04-04 at 16:22 +0300, Kirill A. Shutemov wrote:
> > > On Mon, Feb 26, 2024 at 12:26:20AM -0800, isaku.yamahata@intel.com wrote:
> > > > @@ -491,6 +494,87 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> > > >  	 */
> > > >  }
> > > >  
> > > > +static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
> > > > +{
> > > 
> > > ...
> > > 
> > > > +	tdx->exit_reason.full = __seamcall_saved_ret(TDH_VP_ENTER, &args);
> > > 
> > > Call to __seamcall_saved_ret() leaves noinstr section.
> > > 
> > > __seamcall_saved_ret() has to be moved:
> > > 
> > > diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
> > > index e32cf82ed47e..6b434ab12db6 100644
> > > --- a/arch/x86/virt/vmx/tdx/seamcall.S
> > > +++ b/arch/x86/virt/vmx/tdx/seamcall.S
> > > @@ -44,6 +44,8 @@ SYM_FUNC_START(__seamcall_ret)
> > >  SYM_FUNC_END(__seamcall_ret)
> > >  EXPORT_SYMBOL_GPL(__seamcall_ret);
> > >  
> > > +.section .noinstr.text, "ax"
> > > +
> > >  /*
> > >   * __seamcall_saved_ret() - Host-side interface functions to SEAM software
> > >   * (the P-SEAMLDR or the TDX module), with saving output registers to the
> > 
> > Alternatively, I think we can explicitly use instrumentation_begin()/end()
> > around __seamcall_saved_ret() here.
> 
> No, that will just paper over the complaint.  Dang it, I was going to say that
> I called out earlier that tdx_vcpu_enter_exit() doesn't need to be noinstr, but
> it looks like my brain and fingers didn't connect.
> 
> So I'll say it now :-)
> 
> I don't think tdx_vcpu_enter_exit() needs to be noinstr, because the SEAMCALL is
> functionally a VM-Exit, and so all host state is saved/restored "atomically"
> across the SEAMCALL (some by hardware, some by software (TDX-module)).
> 
> The reason the VM-Enter flows for VMX and SVM need to be noinstr is they do things
> like load the guest's CR2, and handle NMI VM-Exits with NMIs blocks.  None of
> that applies to TDX.  Either that, or there are some massive bugs lurking due to
> missing code.

Ah right.  That's even better :-)

Thanks for jumping in and pointing out!
Binbin Wu April 7, 2024, 1:42 a.m. UTC | #9
On 3/16/2024 1:26 AM, Sean Christopherson wrote:
> On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote:
>> +	 */
>> +	struct kvm_vcpu *vcpu = &tdx->vcpu;
>> +
>> +	guest_state_enter_irqoff();
>> +
>> +	/*
>> +	 * TODO: optimization:
>> +	 * - Eliminate copy between args and vcpu->arch.regs.
>> +	 * - copyin/copyout registers only if (tdx->tdvmvall.regs_mask != 0)
>> +	 *   which means TDG.VP.VMCALL.
>> +	 */
>> +	args = (struct tdx_module_args) {
>> +		.rcx = tdx->tdvpr_pa,
>> +#define REG(reg, REG)	.reg = vcpu->arch.regs[VCPU_REGS_ ## REG]
> Organizing tdx_module_args's registers by volatile vs. non-volatile is asinine.
> This code should not need to exist.

Did you suggest to align the tdx_module_args with enum kvm_reg for GP 
registers, so it can be done by a simple mem copy?

>
>> +	WARN_ON_ONCE(!kvm_rebooting &&
>> +		     (tdx->exit_reason.full & TDX_SW_ERROR) == TDX_SW_ERROR);
>> +
>> +	guest_state_exit_irqoff();
>> +}
>> +
>>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 7258a6304b4b..d72651ce99ac 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -158,6 +158,23 @@  static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx_vcpu_reset(vcpu, init_event);
 }
 
+static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+	if (is_td_vcpu(vcpu))
+		/* Unconditionally continue to vcpu_run(). */
+		return 1;
+
+	return vmx_vcpu_pre_run(vcpu);
+}
+
+static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	if (is_td_vcpu(vcpu))
+		return tdx_vcpu_run(vcpu);
+
+	return vmx_vcpu_run(vcpu);
+}
+
 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
 	if (is_td_vcpu(vcpu)) {
@@ -343,8 +360,8 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.flush_tlb_gva = vt_flush_tlb_gva,
 	.flush_tlb_guest = vt_flush_tlb_guest,
 
-	.vcpu_pre_run = vmx_vcpu_pre_run,
-	.vcpu_run = vmx_vcpu_run,
+	.vcpu_pre_run = vt_vcpu_pre_run,
+	.vcpu_run = vt_vcpu_run,
 	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = vmx_skip_emulated_instruction,
 	.update_emulated_instruction = vmx_update_emulated_instruction,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6aff3f7e2488..fdf9196cb592 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -11,6 +11,9 @@ 
 #include "vmx.h"
 #include "x86.h"
 
+#include <trace/events/kvm.h>
+#include "trace.h"
+
 #undef pr_fmt
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
@@ -491,6 +494,87 @@  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 }
 
+static noinstr void tdx_vcpu_enter_exit(struct vcpu_tdx *tdx)
+{
+	struct tdx_module_args args;
+
+	/*
+	 * Avoid section mismatch with to_tdx() with KVM_VM_BUG().  The caller
+	 * should call to_tdx().
+	 */
+	struct kvm_vcpu *vcpu = &tdx->vcpu;
+
+	guest_state_enter_irqoff();
+
+	/*
+	 * TODO: optimization:
+	 * - Eliminate copy between args and vcpu->arch.regs.
+	 * - copyin/copyout registers only if (tdx->tdvmvall.regs_mask != 0)
+	 *   which means TDG.VP.VMCALL.
+	 */
+	args = (struct tdx_module_args) {
+		.rcx = tdx->tdvpr_pa,
+#define REG(reg, REG)	.reg = vcpu->arch.regs[VCPU_REGS_ ## REG]
+		REG(rdx, RDX),
+		REG(r8,  R8),
+		REG(r9,  R9),
+		REG(r10, R10),
+		REG(r11, R11),
+		REG(r12, R12),
+		REG(r13, R13),
+		REG(r14, R14),
+		REG(r15, R15),
+		REG(rbx, RBX),
+		REG(rdi, RDI),
+		REG(rsi, RSI),
+#undef REG
+	};
+
+	tdx->exit_reason.full = __seamcall_saved_ret(TDH_VP_ENTER, &args);
+
+#define REG(reg, REG)	vcpu->arch.regs[VCPU_REGS_ ## REG] = args.reg
+		REG(rcx, RCX);
+		REG(rdx, RDX);
+		REG(r8,  R8);
+		REG(r9,  R9);
+		REG(r10, R10);
+		REG(r11, R11);
+		REG(r12, R12);
+		REG(r13, R13);
+		REG(r14, R14);
+		REG(r15, R15);
+		REG(rbx, RBX);
+		REG(rdi, RDI);
+		REG(rsi, RSI);
+#undef REG
+
+	WARN_ON_ONCE(!kvm_rebooting &&
+		     (tdx->exit_reason.full & TDX_SW_ERROR) == TDX_SW_ERROR);
+
+	guest_state_exit_irqoff();
+}
+
+fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_tdx *tdx = to_tdx(vcpu);
+
+	if (unlikely(!tdx->initialized))
+		return -EINVAL;
+	if (unlikely(vcpu->kvm->vm_bugged)) {
+		tdx->exit_reason.full = TDX_NON_RECOVERABLE_VCPU;
+		return EXIT_FASTPATH_NONE;
+	}
+
+	trace_kvm_entry(vcpu);
+
+	tdx_vcpu_enter_exit(tdx);
+
+	vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+	trace_kvm_exit(vcpu, KVM_ISA_VMX);
+
+	return EXIT_FASTPATH_NONE;
+}
+
 void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
 {
 	WARN_ON_ONCE(root_hpa & ~PAGE_MASK);
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index d822e790e3e5..81d301fbe638 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -27,6 +27,37 @@  struct kvm_tdx {
 	struct page *source_page;
 };
 
+union tdx_exit_reason {
+	struct {
+		/* 31:0 mirror the VMX Exit Reason format */
+		u64 basic		: 16;
+		u64 reserved16		: 1;
+		u64 reserved17		: 1;
+		u64 reserved18		: 1;
+		u64 reserved19		: 1;
+		u64 reserved20		: 1;
+		u64 reserved21		: 1;
+		u64 reserved22		: 1;
+		u64 reserved23		: 1;
+		u64 reserved24		: 1;
+		u64 reserved25		: 1;
+		u64 bus_lock_detected	: 1;
+		u64 enclave_mode	: 1;
+		u64 smi_pending_mtf	: 1;
+		u64 smi_from_vmx_root	: 1;
+		u64 reserved30		: 1;
+		u64 failed_vmentry	: 1;
+
+		/* 63:32 are TDX specific */
+		u64 details_l1		: 8;
+		u64 class		: 8;
+		u64 reserved61_48	: 14;
+		u64 non_recoverable	: 1;
+		u64 error		: 1;
+	};
+	u64 full;
+};
+
 struct vcpu_tdx {
 	struct kvm_vcpu	vcpu;
 
@@ -34,6 +65,8 @@  struct vcpu_tdx {
 	unsigned long *tdvpx_pa;
 	bool td_vcpu_created;
 
+	union tdx_exit_reason exit_reason;
+
 	bool initialized;
 
 	/*
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 191f2964ec8e..3e29a6fe28ef 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -150,6 +150,7 @@  int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 int tdx_vcpu_create(struct kvm_vcpu *vcpu);
 void tdx_vcpu_free(struct kvm_vcpu *vcpu);
 void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
+fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu);
 u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
@@ -184,6 +185,7 @@  static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOP
 static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
 static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
+static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) { return EXIT_FASTPATH_NONE; }
 static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; }
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }