diff mbox series

[14/21] KVM: TDX: Implement hooks to propagate changes of TDP MMU mirror page table

Message ID 20240904030751.117579-15-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX MMU Part 2 | expand

Commit Message

Edgecombe, Rick P Sept. 4, 2024, 3:07 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Implement hooks in TDX to propagate changes of mirror page table to private
EPT, including changes for page table page adding/removing, guest page
adding/removing.

TDX invokes corresponding SEAMCALLs in the hooks.

- Hook link_external_spt
  propagates adding page table page into private EPT.

- Hook set_external_spte
  tdx_sept_set_private_spte() in this patch only handles adding of guest
  private page when TD is finalized.
  Later patches will handle the case of adding guest private pages before
  TD finalization.

- Hook free_external_spt
  It is invoked when page table page is removed in mirror page table, which
  currently must occur at TD tear down phase, after hkid is freed.

- Hook remove_external_spte
  It is invoked when guest private page is removed in mirror page table,
  which can occur when TD is active, e.g. during shared <-> private
  conversion and slot move/deletion.
  This hook is ensured to be triggered before hkid is freed, because
  gmem fd is released along with all private leaf mappings zapped before
  freeing hkid at VM destroy.

  TDX invokes below SEAMCALLs sequentially:
  1) TDH.MEM.RANGE.BLOCK (remove RWX bits from a private EPT entry),
  2) TDH.MEM.TRACK (increases TD epoch)
  3) TDH.MEM.PAGE.REMOVE (remove the private EPT entry and untrack the
     guest page).

  TDH.MEM.PAGE.REMOVE can't succeed without TDH.MEM.RANGE.BLOCK and
  TDH.MEM.TRACK being called successfully.
  SEAMCALL TDH.MEM.TRACK is called in function tdx_track() to enforce that
  TLB tracking will be performed by TDX module for private EPT.

Co-developed-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
TDX MMU part 2 v1:
 - Split from the big patch "KVM: TDX: TDP MMU TDX support".
 - Move setting up the 4 callbacks (kvm_x86_ops::link_external_spt etc)
   from tdx_hardware_setup() (which doesn't exist anymore) to
   vt_hardware_setup() directly.  Make tdx_sept_link_external_spt() those
   4 callbacks global and add declarations to x86_ops.h so they can be
   setup in vt_hardware_setup().
 - Updated the KVM_BUG_ON() in tdx_sept_free_private_spt(). (Isaku, Binbin)
 - Removed the unused tdx_post_mmu_map_page().
 - Removed WARN_ON_ONCE) in tdh_mem_page_aug() according to Isaku's
   feedback:
   "This WARN_ON_ONCE() is a guard for buggy TDX module. It shouldn't return
   (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX)) when
   SEPT_VE_DISABLED cleared.  Maybe we should remove this WARN_ON_ONCE()
   because the TDX module is mature."
 - Update for the wrapper functions for SEAMCALLs. (Sean)
 - Add preparation for KVM_TDX_INIT_MEM_REGION to make
   tdx_sept_set_private_spte() callback nop when the guest isn't finalized.
 - use unlikely(err) in  tdx_reclaim_td_page().
 - Updates from seamcall overhaul (Kai)
 - Move header definitions from "KVM: TDX: Define TDX architectural
   definitions" (Sean)
 - Drop ugly unions (Sean)
 - Remove tdx_mng_key_config_lock cleanup after dropped in "KVM: TDX:
   create/destroy VM structure" (Chao)
 - Since HKID is freed on vm_destroy() zapping only happens when HKID is
   allocated. Remove relevant code in zapping handlers that assume the
   opposite, and add some KVM_BUG_ON() to assert this where it was
   missing. (Isaku)
---
 arch/x86/kvm/vmx/main.c     |  14 ++-
 arch/x86/kvm/vmx/tdx.c      | 222 +++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx/tdx_arch.h |  23 ++++
 arch/x86/kvm/vmx/tdx_ops.h  |   6 +
 arch/x86/kvm/vmx/x86_ops.h  |  37 ++++++
 5 files changed, 300 insertions(+), 2 deletions(-)

Comments

Huang, Kai Sept. 6, 2024, 2:10 a.m. UTC | #1
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -36,9 +36,21 @@ static __init int vt_hardware_setup(void)
>   	 * is KVM may allocate couple of more bytes than needed for
>   	 * each VM.
>   	 */
> -	if (enable_tdx)
> +	if (enable_tdx) {
>   		vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
>   				sizeof(struct kvm_tdx));
> +		/*
> +		 * Note, TDX may fail to initialize in a later time in
> +		 * vt_init(), in which case it is not necessary to setup
> +		 * those callbacks.  But making them valid here even
> +		 * when TDX fails to init later is fine because those
> +		 * callbacks won't be called if the VM isn't TDX guest.
> +		 */
> +		vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
> +		vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
> +		vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
> +		vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;

Nit:  The callbacks in 'struct kvm_x86_ops' have name "external", but 
TDX callbacks have name "private".  Should we rename TDX callbacks to 
make them aligned?

> +	}
>   
>   	return 0;
>   }
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 6feb3ab96926..b8cd5a629a80 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -447,6 +447,177 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
>   	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
>   }
>   
> +static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
> +{
> +	struct page *page = pfn_to_page(pfn);
> +
> +	put_page(page);
> +}
> +
> +static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
> +			    enum pg_level level, kvm_pfn_t pfn)
> +{
> +	int tdx_level = pg_level_to_tdx_sept_level(level);
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> +	hpa_t hpa = pfn_to_hpa(pfn);
> +	gpa_t gpa = gfn_to_gpa(gfn);
> +	u64 entry, level_state;
> +	u64 err;
> +
> +	err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
> +	if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
> +		tdx_unpin(kvm, pfn);
> +		return -EAGAIN;
> +	}

Nit: Here (and other non-fatal error cases) I think we should return 
-EBUSY to make it consistent with non-TDX case?  E.g., the non-TDX case has:

                 if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
                         return -EBUSY;

And the comment of tdp_mmu_set_spte_atomic() currently says it can only 
return 0 or -EBUSY.  It needs to be patched to reflect it can also 
return other non-0 errors like -EIO but those are fatal.  In terms of 
non-fatal error I don't think we need another -EAGAIN.

/*
  * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically

[...]

  * Return:
  * * 0      - If the SPTE was set.
  * * -EBUSY - If the SPTE cannot be set. In this case this function will
  *	      have no side-effects other than setting iter->old_spte to
  *            the last known value of the spte.
  */

[...]

> +
> +static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
> +				      enum pg_level level, kvm_pfn_t pfn)
> +{
>
[...]

> +
> +	hpa_with_hkid = set_hkid_to_hpa(hpa, (u16)kvm_tdx->hkid);
> +	do {
> +		/*
> +		 * TDX_OPERAND_BUSY can happen on locking PAMT entry.  Because
> +		 * this page was removed above, other thread shouldn't be
> +		 * repeatedly operating on this page.  Just retry loop.
> +		 */
> +		err = tdh_phymem_page_wbinvd(hpa_with_hkid);
> +	} while (unlikely(err == (TDX_OPERAND_BUSY | TDX_OPERAND_ID_RCX)));

In what case(s) other threads can concurrently lock the PAMT entry, 
leading to the above BUSY?

[...]

> +
> +int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> +				 enum pg_level level, kvm_pfn_t pfn)
> +{
> +	int ret;
> +
> +	/*
> +	 * HKID is released when vm_free() which is after closing gmem_fd

 From latest dev branch HKID is freed from vt_vm_destroy(), but not 
vm_free() (which should be tdx_vm_free() btw).

static void vt_vm_destroy(struct kvm *kvm)
{
         if (is_td(kvm))
                 return tdx_mmu_release_hkid(kvm);

         vmx_vm_destroy(kvm);
}

Btw, why not have a tdx_vm_destroy() wrapper?  Seems all other vt_xx()s 
have a tdx_xx() but only this one calls tdx_mmu_release_hkid() directly.

> +	 * which causes gmem invalidation to zap all spte.
> +	 * Population is only allowed after KVM_TDX_INIT_VM.
> +	 */

What does the second sentence ("Population ...")  meaning?  Why is it 
relevant here?
Edgecombe, Rick P Sept. 9, 2024, 9:03 p.m. UTC | #2
On Fri, 2024-09-06 at 14:10 +1200, Huang, Kai wrote:
> 
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -36,9 +36,21 @@ static __init int vt_hardware_setup(void)
> >          * is KVM may allocate couple of more bytes than needed for
> >          * each VM.
> >          */
> > -       if (enable_tdx)
> > +       if (enable_tdx) {
> >                 vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
> >                                 sizeof(struct kvm_tdx));
> > +               /*
> > +                * Note, TDX may fail to initialize in a later time in
> > +                * vt_init(), in which case it is not necessary to setup
> > +                * those callbacks.  But making them valid here even
> > +                * when TDX fails to init later is fine because those
> > +                * callbacks won't be called if the VM isn't TDX guest.
> > +                */
> > +               vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
> > +               vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
> > +               vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
> > +               vt_x86_ops.remove_external_spte =
> > tdx_sept_remove_private_spte;
> 
> Nit:  The callbacks in 'struct kvm_x86_ops' have name "external", but 
> TDX callbacks have name "private".  Should we rename TDX callbacks to 
> make them aligned?

"external" is the core MMU naming abstraction. I think you were part of the
discussion to purge special TDX private naming from the core MMU to avoid
confusion with AMD private memory in the last MMU series.

So external page tables ended up being a general concept, and private mem is the
TDX use. In practice of course it will likely only be used for TDX. So I thought
the external<->private connection here was nice to have.

> 
> > +       }
> >   
> >         return 0;
> >   }
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 6feb3ab96926..b8cd5a629a80 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -447,6 +447,177 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t
> > root_hpa, int pgd_level)
> >         td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
> >   }
> >   
> > +static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
> > +{
> > +       struct page *page = pfn_to_page(pfn);
> > +
> > +       put_page(page);
> > +}
> > +
> > +static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
> > +                           enum pg_level level, kvm_pfn_t pfn)
> > +{
> > +       int tdx_level = pg_level_to_tdx_sept_level(level);
> > +       struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > +       hpa_t hpa = pfn_to_hpa(pfn);
> > +       gpa_t gpa = gfn_to_gpa(gfn);
> > +       u64 entry, level_state;
> > +       u64 err;
> > +
> > +       err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
> > +       if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
> > +               tdx_unpin(kvm, pfn);
> > +               return -EAGAIN;
> > +       }
> 
> Nit: Here (and other non-fatal error cases) I think we should return 
> -EBUSY to make it consistent with non-TDX case?  E.g., the non-TDX case has:
> 
>                  if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
>                          return -EBUSY;
> 
> And the comment of tdp_mmu_set_spte_atomic() currently says it can only 
> return 0 or -EBUSY.  It needs to be patched to reflect it can also 
> return other non-0 errors like -EIO but those are fatal.  In terms of 
> non-fatal error I don't think we need another -EAGAIN.

Yes, good point.

> 
> /*
>   * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
> 
> [...]
> 
>   * Return:
>   * * 0      - If the SPTE was set.
>   * * -EBUSY - If the SPTE cannot be set. In this case this function will
>   *           have no side-effects other than setting iter->old_spte to
>   *            the last known value of the spte.
>   */
> 
> [...]
> 
> > +
> > +static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
> > +                                     enum pg_level level, kvm_pfn_t pfn)
> > +{
> > 
> [...]
> 
> > +
> > +       hpa_with_hkid = set_hkid_to_hpa(hpa, (u16)kvm_tdx->hkid);
> > +       do {
> > +               /*
> > +                * TDX_OPERAND_BUSY can happen on locking PAMT entry. 
> > Because
> > +                * this page was removed above, other thread shouldn't be
> > +                * repeatedly operating on this page.  Just retry loop.
> > +                */
> > +               err = tdh_phymem_page_wbinvd(hpa_with_hkid);
> > +       } while (unlikely(err == (TDX_OPERAND_BUSY | TDX_OPERAND_ID_RCX)));
> 
> In what case(s) other threads can concurrently lock the PAMT entry, 
> leading to the above BUSY?

Good question, lets add this to the seamcall retry research.

> 
> [...]
> 
> > +
> > +int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> > +                                enum pg_level level, kvm_pfn_t pfn)
> > +{
> > +       int ret;
> > +
> > +       /*
> > +        * HKID is released when vm_free() which is after closing gmem_fd
> 
>  From latest dev branch HKID is freed from vt_vm_destroy(), but not 
> vm_free() (which should be tdx_vm_free() btw).

Oh, yes, we should update the comment.

> 
> static void vt_vm_destroy(struct kvm *kvm)
> {
>          if (is_td(kvm))
>                  return tdx_mmu_release_hkid(kvm);
> 
>          vmx_vm_destroy(kvm);
> }
> 
> Btw, why not have a tdx_vm_destroy() wrapper?  Seems all other vt_xx()s 
> have a tdx_xx() but only this one calls tdx_mmu_release_hkid() directly.

No strong reason. It's asymmetric to the other tdx callbacks, but KVM code tends
to be less wrapped and a tdx_vm_destory would be a oneline function. So I think
it fits in other ways.

> 
> > +        * which causes gmem invalidation to zap all spte.
> > +        * Population is only allowed after KVM_TDX_INIT_VM.
> > +        */
> 
> What does the second sentence ("Population ...")  meaning?  Why is it 
> relevant here?
> 
How about:
/*
 * HKID is released after all private pages have been removed,
 * and set before any might be populated. Warn if zapping is
 * attempted when there can't be anything populated in the private
 * EPT.
 */

But actually, I wonder if we need to remove the KVM_BUG_ON(). I think if you did
a KVM_PRE_FAULT_MEMORY and then deleted the memslot you could hit it?
Yan Zhao Sept. 10, 2024, 1:52 a.m. UTC | #3
On Tue, Sep 10, 2024 at 05:03:57AM +0800, Edgecombe, Rick P wrote:
> On Fri, 2024-09-06 at 14:10 +1200, Huang, Kai wrote:
...
> > > +        * which causes gmem invalidation to zap all spte.
> > > +        * Population is only allowed after KVM_TDX_INIT_VM.
> > > +        */
> > 
> > What does the second sentence ("Population ...")  meaning?  Why is it 
> > relevant here?
> > 
> How about:
> /*
>  * HKID is released after all private pages have been removed,
>  * and set before any might be populated. Warn if zapping is
>  * attempted when there can't be anything populated in the private
>  * EPT.
>  */
> 
> But actually, I wonder if we need to remove the KVM_BUG_ON(). I think if you did
> a KVM_PRE_FAULT_MEMORY and then deleted the memslot you could hit it?
If we disallow vCPU creation before TD initialization, as discussed in [1],
the BUG_ON should not be hit.

[1] https://lore.kernel.org/all/ZtAU7FIV2Xkw+L3O@yzhao56-desk.sh.intel.com/
Paolo Bonzini Sept. 10, 2024, 9:33 a.m. UTC | #4
On 9/9/24 23:03, Edgecombe, Rick P wrote:
> KVM code tends to be less wrapped and a tdx_vm_destory would be a
> oneline function. So I think it fits in other ways.

Yes, no problem there.  Sometimes one-line functions are ok (see 
ept_sync_global() case elsewhere in the series), sometimes they're 
overkill, especially if they wrap a function defined in the same file as 
the wrapper.

>>> +        * which causes gmem invalidation to zap all spte.
>>> +        * Population is only allowed after KVM_TDX_INIT_VM.
>>> +        */
>> What does the second sentence ("Population ...")  meaning?  Why is it
>> relevant here?
>>
> How about:
> /*
>   * HKID is released after all private pages have been removed,
>   * and set before any might be populated. Warn if zapping is
>   * attempted when there can't be anything populated in the private
>   * EPT.
>   */
> 
> But actually, I wonder if we need to remove the KVM_BUG_ON(). I think if you did
> a KVM_PRE_FAULT_MEMORY and then deleted the memslot you could hit it?

I think all paths to handle_removed_pt() are safe:

__tdp_mmu_zap_root
         tdp_mmu_zap_root
                 kvm_tdp_mmu_zap_all
                         kvm_arch_flush_shadow_all
                                 kvm_flush_shadow_all
                                         kvm_destroy_vm (*)
                                         kvm_mmu_notifier_release (*)
                 kvm_tdp_mmu_zap_invalidated_roots
                         kvm_mmu_zap_all_fast (**)
kvm_tdp_mmu_zap_sp
         kvm_recover_nx_huge_pages (***)


(*) only called at destroy time
(**) only invalidates direct roots
(***) shouldn't apply to TDX I hope?

Paolo
Edgecombe, Rick P Sept. 10, 2024, 11:58 p.m. UTC | #5
On Tue, 2024-09-10 at 11:33 +0200, Paolo Bonzini wrote:
> > But actually, I wonder if we need to remove the KVM_BUG_ON(). I think if you
> > did
> > a KVM_PRE_FAULT_MEMORY and then deleted the memslot you could hit it?
> 
> I think all paths to handle_removed_pt() are safe:
> 
> __tdp_mmu_zap_root
>          tdp_mmu_zap_root
>                  kvm_tdp_mmu_zap_all
>                          kvm_arch_flush_shadow_all
>                                  kvm_flush_shadow_all
>                                          kvm_destroy_vm (*)
>                                          kvm_mmu_notifier_release (*)
>                  kvm_tdp_mmu_zap_invalidated_roots
>                          kvm_mmu_zap_all_fast (**)
> kvm_tdp_mmu_zap_sp
>          kvm_recover_nx_huge_pages (***)

But not all paths to remove_external_spte():
kvm_arch_flush_shadow_memslot()
  kvm_mmu_zap_memslot_leafs()
    kvm_tdp_mmu_unmap_gfn_range()
      tdp_mmu_zap_leafs()
        tdp_mmu_iter_set_spte()
          tdp_mmu_set_spte()
            remove_external_spte()
              tdx_sept_remove_private_spte()

But we can probably keep the warning if we prevent KVM_PRE_FAULT_MEMORY as you
pointed earlier. I didn't see that that kvm->arch.pre_fault_allowed  got added.
Yan Zhao Sept. 11, 2024, 1:05 a.m. UTC | #6
On Wed, Sep 11, 2024 at 07:58:01AM +0800, Edgecombe, Rick P wrote:
> On Tue, 2024-09-10 at 11:33 +0200, Paolo Bonzini wrote:
> > > But actually, I wonder if we need to remove the KVM_BUG_ON(). I think if you
> > > did
> > > a KVM_PRE_FAULT_MEMORY and then deleted the memslot you could hit it?
> > 
> > I think all paths to handle_removed_pt() are safe:
> > 
> > __tdp_mmu_zap_root
> >          tdp_mmu_zap_root
> >                  kvm_tdp_mmu_zap_all
> >                          kvm_arch_flush_shadow_all
> >                                  kvm_flush_shadow_all
> >                                          kvm_destroy_vm (*)
> >                                          kvm_mmu_notifier_release (*)
> >                  kvm_tdp_mmu_zap_invalidated_roots
> >                          kvm_mmu_zap_all_fast (**)
> > kvm_tdp_mmu_zap_sp
> >          kvm_recover_nx_huge_pages (***)
> 
> But not all paths to remove_external_spte():
> kvm_arch_flush_shadow_memslot()
>   kvm_mmu_zap_memslot_leafs()
>     kvm_tdp_mmu_unmap_gfn_range()
>       tdp_mmu_zap_leafs()
>         tdp_mmu_iter_set_spte()
>           tdp_mmu_set_spte()
>             remove_external_spte()
>               tdx_sept_remove_private_spte()
> 
> But we can probably keep the warning if we prevent KVM_PRE_FAULT_MEMORY as you
> pointed earlier. I didn't see that that kvm->arch.pre_fault_allowed  got added.
Note:
If we diallow vCPU to be created before vm ioctl KVM_TDX_INIT_VM is done,
the vCPU ioctl KVM_PRE_FAULT_MEMORY can't be executed.
Then we can't hit the 
"if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))"
in tdx_sept_remove_private_spte().
Binbin Wu Oct. 30, 2024, 3:03 a.m. UTC | #7
On 9/4/2024 11:07 AM, Rick Edgecombe wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
[...]
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 6feb3ab96926..b8cd5a629a80 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -447,6 +447,177 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
>   	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
>   }
>   
> +static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
> +{
> +	struct page *page = pfn_to_page(pfn);
> +
> +	put_page(page);
Nit: It can be
put_page(pfn_to_page(pfn));


> +}
> +
> +static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
> +			    enum pg_level level, kvm_pfn_t pfn)
> +{
> +	int tdx_level = pg_level_to_tdx_sept_level(level);
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> +	hpa_t hpa = pfn_to_hpa(pfn);
> +	gpa_t gpa = gfn_to_gpa(gfn);
> +	u64 entry, level_state;
> +	u64 err;
> +
> +	err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
Nit:
Usually, kernel prefers to handle and return for error conditions first.

But for this case, for all error conditions, it needs to unpin the page.
Is it better to return the successful case first, so that it only needs
to call tdx_unpin() once?

> +	if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
> +		tdx_unpin(kvm, pfn);
> +		return -EAGAIN;
> +	}
> +	if (unlikely(err == (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))) {
> +		if (tdx_get_sept_level(level_state) == tdx_level &&
> +		    tdx_get_sept_state(level_state) == TDX_SEPT_PENDING &&
> +		    is_last_spte(entry, level) &&
> +		    spte_to_pfn(entry) == pfn &&
> +		    entry & VMX_EPT_SUPPRESS_VE_BIT) {
Can this condition be triggered?
For contention from multiple vCPUs, the winner has frozen the SPTE,
it shouldn't trigger this.
Could KVM  do page aug for a same page multiple times somehow?


> +			tdx_unpin(kvm, pfn);
> +			return -EAGAIN;
> +		}
> +	}
> +	if (KVM_BUG_ON(err, kvm)) {
> +		pr_tdx_error_2(TDH_MEM_PAGE_AUG, err, entry, level_state);
> +		tdx_unpin(kvm, pfn);
> +		return -EIO;
> +	}
> +
> +	return 0;
> +}
> +
> +int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
> +			      enum pg_level level, kvm_pfn_t pfn)
> +{
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> +
> +	/* TODO: handle large pages. */
> +	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
> +		return -EINVAL;
> +
> +	/*
> +	 * Because guest_memfd doesn't support page migration with
> +	 * a_ops->migrate_folio (yet), no callback is triggered for KVM on page
> +	 * migration.  Until guest_memfd supports page migration, prevent page
> +	 * migration.
> +	 * TODO: Once guest_memfd introduces callback on page migration,
> +	 * implement it and remove get_page/put_page().
> +	 */
> +	get_page(pfn_to_page(pfn));
> +
> +	if (likely(is_td_finalized(kvm_tdx)))
> +		return tdx_mem_page_aug(kvm, gfn, level, pfn);
> +
> +	/*
> +	 * TODO: KVM_MAP_MEMORY support to populate before finalize comes
> +	 * here for the initial memory.
> +	 */
> +	return 0;
Is it better to return error before adding the support?

> +}
> +
[...]
Yan Zhao Nov. 4, 2024, 9:09 a.m. UTC | #8
On Wed, Oct 30, 2024 at 11:03:39AM +0800, Binbin Wu wrote:
> On 9/4/2024 11:07 AM, Rick Edgecombe wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> [...]
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 6feb3ab96926..b8cd5a629a80 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -447,6 +447,177 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
> >   	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
> >   }
> > +static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
> > +{
> > +	struct page *page = pfn_to_page(pfn);
> > +
> > +	put_page(page);
> Nit: It can be
> put_page(pfn_to_page(pfn));
> 
Yes, thanks.

> 
> > +}
> > +
> > +static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
> > +			    enum pg_level level, kvm_pfn_t pfn)
> > +{
> > +	int tdx_level = pg_level_to_tdx_sept_level(level);
> > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > +	hpa_t hpa = pfn_to_hpa(pfn);
> > +	gpa_t gpa = gfn_to_gpa(gfn);
> > +	u64 entry, level_state;
> > +	u64 err;
> > +
> > +	err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
> Nit:
> Usually, kernel prefers to handle and return for error conditions first.
> 
> But for this case, for all error conditions, it needs to unpin the page.
> Is it better to return the successful case first, so that it only needs
> to call tdx_unpin() once?
> 
> > +	if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
> > +		tdx_unpin(kvm, pfn);
> > +		return -EAGAIN;
> > +	}
As how to handle the busy is not decided up to now, I prefer to leave this hunk
as is.

> > +	if (unlikely(err == (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))) {
> > +		if (tdx_get_sept_level(level_state) == tdx_level &&
> > +		    tdx_get_sept_state(level_state) == TDX_SEPT_PENDING &&
> > +		    is_last_spte(entry, level) &&
> > +		    spte_to_pfn(entry) == pfn &&
> > +		    entry & VMX_EPT_SUPPRESS_VE_BIT) {
> Can this condition be triggered?
> For contention from multiple vCPUs, the winner has frozen the SPTE,
> it shouldn't trigger this.
> Could KVM  do page aug for a same page multiple times somehow?
This condition should not be triggered due to the BUG_ON in
set_external_spte_present().

With Isaku's series [1], this condition will not happen either.

Will remove it. Thanks!


[1] https://lore.kernel.org/all/cover.1728718232.git.isaku.yamahata@intel.com/

> 
> > +			tdx_unpin(kvm, pfn);
> > +			return -EAGAIN;
> > +		}
> > +	}
> > +	if (KVM_BUG_ON(err, kvm)) {
> > +		pr_tdx_error_2(TDH_MEM_PAGE_AUG, err, entry, level_state);
> > +		tdx_unpin(kvm, pfn);
> > +		return -EIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
> > +			      enum pg_level level, kvm_pfn_t pfn)
> > +{
> > +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > +
> > +	/* TODO: handle large pages. */
> > +	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * Because guest_memfd doesn't support page migration with
> > +	 * a_ops->migrate_folio (yet), no callback is triggered for KVM on page
> > +	 * migration.  Until guest_memfd supports page migration, prevent page
> > +	 * migration.
> > +	 * TODO: Once guest_memfd introduces callback on page migration,
> > +	 * implement it and remove get_page/put_page().
> > +	 */
> > +	get_page(pfn_to_page(pfn));
> > +
> > +	if (likely(is_td_finalized(kvm_tdx)))
> > +		return tdx_mem_page_aug(kvm, gfn, level, pfn);
> > +
> > +	/*
> > +	 * TODO: KVM_MAP_MEMORY support to populate before finalize comes
> > +	 * here for the initial memory.
> > +	 */
> > +	return 0;
> Is it better to return error before adding the support?
Hmm, returning error is better, though returning 0 is not wrong.
The future tdx_mem_page_record_premap_cnt() just increases
kvm_tdx->nr_premapped, while tdx_vcpu_init_mem_region() can be implemented
without checking kvm_tdx->nr_premapped.


> > +}
> > +
> [...]
> 
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 1c86849680a3..bf6fd5cca1d6 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -36,9 +36,21 @@  static __init int vt_hardware_setup(void)
 	 * is KVM may allocate couple of more bytes than needed for
 	 * each VM.
 	 */
-	if (enable_tdx)
+	if (enable_tdx) {
 		vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
 				sizeof(struct kvm_tdx));
+		/*
+		 * Note, TDX may fail to initialize in a later time in
+		 * vt_init(), in which case it is not necessary to setup
+		 * those callbacks.  But making them valid here even
+		 * when TDX fails to init later is fine because those
+		 * callbacks won't be called if the VM isn't TDX guest.
+		 */
+		vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
+		vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
+		vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
+		vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
+	}
 
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6feb3ab96926..b8cd5a629a80 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -447,6 +447,177 @@  void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
 	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
 }
 
+static void tdx_unpin(struct kvm *kvm, kvm_pfn_t pfn)
+{
+	struct page *page = pfn_to_page(pfn);
+
+	put_page(page);
+}
+
+static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
+			    enum pg_level level, kvm_pfn_t pfn)
+{
+	int tdx_level = pg_level_to_tdx_sept_level(level);
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+	hpa_t hpa = pfn_to_hpa(pfn);
+	gpa_t gpa = gfn_to_gpa(gfn);
+	u64 entry, level_state;
+	u64 err;
+
+	err = tdh_mem_page_aug(kvm_tdx, gpa, hpa, &entry, &level_state);
+	if (unlikely(err == TDX_ERROR_SEPT_BUSY)) {
+		tdx_unpin(kvm, pfn);
+		return -EAGAIN;
+	}
+	if (unlikely(err == (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))) {
+		if (tdx_get_sept_level(level_state) == tdx_level &&
+		    tdx_get_sept_state(level_state) == TDX_SEPT_PENDING &&
+		    is_last_spte(entry, level) &&
+		    spte_to_pfn(entry) == pfn &&
+		    entry & VMX_EPT_SUPPRESS_VE_BIT) {
+			tdx_unpin(kvm, pfn);
+			return -EAGAIN;
+		}
+	}
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error_2(TDH_MEM_PAGE_AUG, err, entry, level_state);
+		tdx_unpin(kvm, pfn);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, kvm_pfn_t pfn)
+{
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+	/* TODO: handle large pages. */
+	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
+		return -EINVAL;
+
+	/*
+	 * Because guest_memfd doesn't support page migration with
+	 * a_ops->migrate_folio (yet), no callback is triggered for KVM on page
+	 * migration.  Until guest_memfd supports page migration, prevent page
+	 * migration.
+	 * TODO: Once guest_memfd introduces callback on page migration,
+	 * implement it and remove get_page/put_page().
+	 */
+	get_page(pfn_to_page(pfn));
+
+	if (likely(is_td_finalized(kvm_tdx)))
+		return tdx_mem_page_aug(kvm, gfn, level, pfn);
+
+	/*
+	 * TODO: KVM_MAP_MEMORY support to populate before finalize comes
+	 * here for the initial memory.
+	 */
+	return 0;
+}
+
+static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
+				      enum pg_level level, kvm_pfn_t pfn)
+{
+	int tdx_level = pg_level_to_tdx_sept_level(level);
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+	gpa_t gpa = gfn_to_gpa(gfn);
+	hpa_t hpa = pfn_to_hpa(pfn);
+	hpa_t hpa_with_hkid;
+	u64 err, entry, level_state;
+
+	/* TODO: handle large pages. */
+	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
+		return -EINVAL;
+
+	if (KVM_BUG_ON(!is_hkid_assigned(kvm_tdx), kvm))
+		return -EINVAL;
+
+	do {
+		/*
+		 * When zapping private page, write lock is held. So no race
+		 * condition with other vcpu sept operation.  Race only with
+		 * TDH.VP.ENTER.
+		 */
+		err = tdh_mem_page_remove(kvm_tdx, gpa, tdx_level, &entry,
+					  &level_state);
+	} while (unlikely(err == TDX_ERROR_SEPT_BUSY));
+	if (unlikely(!is_td_finalized(kvm_tdx) &&
+		     err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
+		/*
+		 * This page was mapped with KVM_MAP_MEMORY, but
+		 * KVM_TDX_INIT_MEM_REGION is not issued yet.
+		 */
+		if (!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) {
+			tdx_unpin(kvm, pfn);
+			return 0;
+		}
+	}
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error_2(TDH_MEM_PAGE_REMOVE, err, entry, level_state);
+		return -EIO;
+	}
+
+	hpa_with_hkid = set_hkid_to_hpa(hpa, (u16)kvm_tdx->hkid);
+	do {
+		/*
+		 * TDX_OPERAND_BUSY can happen on locking PAMT entry.  Because
+		 * this page was removed above, other thread shouldn't be
+		 * repeatedly operating on this page.  Just retry loop.
+		 */
+		err = tdh_phymem_page_wbinvd(hpa_with_hkid);
+	} while (unlikely(err == (TDX_OPERAND_BUSY | TDX_OPERAND_ID_RCX)));
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
+		return -EIO;
+	}
+	tdx_clear_page(hpa);
+	tdx_unpin(kvm, pfn);
+	return 0;
+}
+
+int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, void *private_spt)
+{
+	int tdx_level = pg_level_to_tdx_sept_level(level);
+	gpa_t gpa = gfn_to_gpa(gfn);
+	hpa_t hpa = __pa(private_spt);
+	u64 err, entry, level_state;
+
+	err = tdh_mem_sept_add(to_kvm_tdx(kvm), gpa, tdx_level, hpa, &entry,
+			       &level_state);
+	if (unlikely(err == TDX_ERROR_SEPT_BUSY))
+		return -EAGAIN;
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error_2(TDH_MEM_SEPT_ADD, err, entry, level_state);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
+				     enum pg_level level)
+{
+	int tdx_level = pg_level_to_tdx_sept_level(level);
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+	gpa_t gpa = gfn_to_gpa(gfn) & KVM_HPAGE_MASK(level);
+	u64 err, entry, level_state;
+
+	/* For now large page isn't supported yet. */
+	WARN_ON_ONCE(level != PG_LEVEL_4K);
+
+	err = tdh_mem_range_block(kvm_tdx, gpa, tdx_level, &entry, &level_state);
+	if (unlikely(err == TDX_ERROR_SEPT_BUSY))
+		return -EAGAIN;
+	if (KVM_BUG_ON(err, kvm)) {
+		pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * Ensure shared and private EPTs to be flushed on all vCPUs.
  * tdh_mem_track() is the only caller that increases TD epoch. An increase in
@@ -471,7 +642,7 @@  void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
  * occurs certainly after TD epoch increment and before the next
  * tdh_mem_track().
  */
-static void __always_unused tdx_track(struct kvm *kvm)
+static void tdx_track(struct kvm *kvm)
 {
 	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
 	u64 err;
@@ -492,6 +663,55 @@  static void __always_unused tdx_track(struct kvm *kvm)
 	kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
 }
 
+int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, void *private_spt)
+{
+	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+	/*
+	 * free_external_spt() is only called after hkid is freed when TD is
+	 * tearing down.
+	 * KVM doesn't (yet) zap page table pages in mirror page table while
+	 * TD is active, though guest pages mapped in mirror page table could be
+	 * zapped during TD is active, e.g. for shared <-> private conversion
+	 * and slot move/deletion.
+	 */
+	if (KVM_BUG_ON(is_hkid_assigned(kvm_tdx), kvm))
+		return -EINVAL;
+
+	/*
+	 * The HKID assigned to this TD was already freed and cache was
+	 * already flushed. We don't have to flush again.
+	 */
+	return tdx_reclaim_page(__pa(private_spt));
+}
+
+int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
+				 enum pg_level level, kvm_pfn_t pfn)
+{
+	int ret;
+
+	/*
+	 * HKID is released when vm_free() which is after closing gmem_fd
+	 * which causes gmem invalidation to zap all spte.
+	 * Population is only allowed after KVM_TDX_INIT_VM.
+	 */
+	if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
+		return -EINVAL;
+
+	ret = tdx_sept_zap_private_spte(kvm, gfn, level);
+	if (ret)
+		return ret;
+
+	/*
+	 * TDX requires TLB tracking before dropping private page.  Do
+	 * it here, although it is also done later.
+	 */
+	tdx_track(kvm);
+
+	return tdx_sept_drop_private_spte(kvm, gfn, level, pfn);
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
index 815e74408a34..634ed76db26a 100644
--- a/arch/x86/kvm/vmx/tdx_arch.h
+++ b/arch/x86/kvm/vmx/tdx_arch.h
@@ -155,6 +155,29 @@  struct td_params {
 #define TDX_MIN_TSC_FREQUENCY_KHZ		(100 * 1000)
 #define TDX_MAX_TSC_FREQUENCY_KHZ		(10 * 1000 * 1000)
 
+/* Additional Secure EPT entry information */
+#define TDX_SEPT_LEVEL_MASK		GENMASK_ULL(2, 0)
+#define TDX_SEPT_STATE_MASK		GENMASK_ULL(15, 8)
+#define TDX_SEPT_STATE_SHIFT		8
+
+enum tdx_sept_entry_state {
+	TDX_SEPT_FREE = 0,
+	TDX_SEPT_BLOCKED = 1,
+	TDX_SEPT_PENDING = 2,
+	TDX_SEPT_PENDING_BLOCKED = 3,
+	TDX_SEPT_PRESENT = 4,
+};
+
+static inline u8 tdx_get_sept_level(u64 sept_entry_info)
+{
+	return sept_entry_info & TDX_SEPT_LEVEL_MASK;
+}
+
+static inline u8 tdx_get_sept_state(u64 sept_entry_info)
+{
+	return (sept_entry_info & TDX_SEPT_STATE_MASK) >> TDX_SEPT_STATE_SHIFT;
+}
+
 #define MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM	BIT_ULL(20)
 
 /*
diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h
index 8ca3e252a6ed..73ffd80223b0 100644
--- a/arch/x86/kvm/vmx/tdx_ops.h
+++ b/arch/x86/kvm/vmx/tdx_ops.h
@@ -31,6 +31,12 @@ 
 #define pr_tdx_error_3(__fn, __err, __rcx, __rdx, __r8)	\
 	pr_tdx_error_N(__fn, __err, "rcx 0x%llx, rdx 0x%llx, r8 0x%llx\n", __rcx, __rdx, __r8)
 
+static inline int pg_level_to_tdx_sept_level(enum pg_level level)
+{
+	WARN_ON_ONCE(level == PG_LEVEL_NONE);
+	return level - 1;
+}
+
 /*
  * TDX module acquires its internal lock for resources.  It doesn't spin to get
  * locks because of its restrictions of allowed execution time.  Instead, it
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 28fda93f0b27..d1db807b793a 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -131,6 +131,15 @@  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
 
+int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, void *private_spt);
+int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, void *private_spt);
+int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
+			      enum pg_level level, kvm_pfn_t pfn);
+int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
+				 enum pg_level level, kvm_pfn_t pfn);
+
 void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
 void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 #else
@@ -146,6 +155,34 @@  static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
 
+static inline int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
+					    enum pg_level level,
+					    void *private_spt)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
+					    enum pg_level level,
+					    void *private_spt)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
+					    enum pg_level level,
+					    kvm_pfn_t pfn)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
+					       enum pg_level level,
+					       kvm_pfn_t pfn)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) {}
 static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}
 #endif