Message ID | 20250101074959.412696-11-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86/virt/tdx: Add SEAMCALL wrappers for KVM | expand |
On Wed, 2025-01-01 at 02:49 -0500, Paolo Bonzini wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > TDX architecture introduces the concept of private GPA vs shared GPA, > depending on the GPA.SHARED bit. The TDX module maintains a single Secure > EPT (S-EPT or SEPT) tree per TD to translate TD's private memory accessed > using a private GPA. Wrap the SEAMCALL TDH.MEM.PAGE.REMOVE with > tdh_mem_page_remove() and TDH_PHYMEM_PAGE_WBINVD with > tdh_phymem_page_wbinvd_hkid() to unmap a TD private page from the SEPT, > remove the TD private page from the TDX module and flush cache lines to > memory after removal of the private page. > > Callers should specify "GPA" and "level" when calling tdh_mem_page_remove() > to indicate to the TDX module which TD private page to unmap and remove. > > TDH.MEM.PAGE.REMOVE may fail, and the caller of tdh_mem_page_remove() can > check the function return value and retrieve extended error information > from the function output parameters. Follow the TLB tracking protocol > before calling tdh_mem_page_remove() to remove a TD private page to avoid > SEAMCALL failure. > > After removing a TD's private page, the TDX module does not write back and > invalidate cache lines associated with the page and the page's keyID (i.e., > the TD's guest keyID). Therefore, provide tdh_phymem_page_wbinvd_hkid() to > allow the caller to pass in the TD's guest keyID and invoke > TDH_PHYMEM_PAGE_WBINVD to perform this action. > > Before reusing the page, the host kernel needs to map the page with keyID 0 > and invoke movdir64b() to convert the TD private page to a normal shared > page. > > TDH.MEM.PAGE.REMOVE and TDH_PHYMEM_PAGE_WBINVD may meet contentions inside > the TDX module for TDX's internal resources. To avoid staying in SEAM mode > for too long, TDX module will return a BUSY error code to the kernel > instead of spinning on the locks. The caller may need to handle this error > in specific ways (e.g., retry). The wrappers return the SEAMCALL error code > directly to the caller. Don't attempt to handle it in the core kernel. > > [Kai: Switched from generic seamcall export] > [Yan: Re-wrote the changelog] > Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Signed-off-by: Kai Huang <kai.huang@intel.com> > Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> > Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> > Message-ID: <20241112073658.22157-1-yan.y.zhao@intel.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/include/asm/tdx.h | 2 ++ > arch/x86/virt/vmx/tdx/tdx.c | 27 +++++++++++++++++++++++++++ > arch/x86/virt/vmx/tdx/tdx.h | 1 + > 3 files changed, 30 insertions(+) > > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h > index f0b7b7b7d506..74938f725481 100644 > --- a/arch/x86/include/asm/tdx.h > +++ b/arch/x86/include/asm/tdx.h > @@ -157,8 +157,10 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); > u64 tdh_vp_init_apicid(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); > u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); > u64 tdh_mem_track(struct tdx_td *tdr); > +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx); > u64 tdh_phymem_cache_wb(bool resume); > u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); > +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid); > #else > static inline void tdx_init(void) { } > static inline int tdx_cpu_enable(void) { return -ENODEV; } > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c > index c7e6f30d0a14..cde55e9b3280 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.c > +++ b/arch/x86/virt/vmx/tdx/tdx.c > @@ -1761,6 +1761,23 @@ u64 tdh_mem_track(struct tdx_td *td) > } > EXPORT_SYMBOL_GPL(tdh_mem_track); > > +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx) level could be an int instead of a u64. An enum was also discussed, but considered to be not completely necessary. Probably we could even lose the level arg, depending on what we want to do about the one for page.aug. > +{ > + struct tdx_module_args args = { > + .rcx = gpa | level, Yan had "= gpa | (level & 0x7)" here, to make sure to only apply bits 0-2. > + .rdx = tdx_tdr_pa(td), > + }; > + u64 ret; > + > + ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args); > + > + *rcx = args.rcx; > + *rdx = args.rdx; Switch to extended_err1/2 if the others get changed. > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(tdh_mem_page_remove); > + > u64 tdh_phymem_cache_wb(bool resume) > { > struct tdx_module_args args = { > @@ -1780,3 +1797,13 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) > return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); > } > EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); > + > +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid) > +{ > + struct tdx_module_args args = {}; > + > + args.rcx = hpa | (hkid << boot_cpu_data.x86_phys_bits); > + > + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); > +} > +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); > diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h > index 4b0ad536afd9..d49cdd9b0577 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.h > +++ b/arch/x86/virt/vmx/tdx/tdx.h > @@ -33,6 +33,7 @@ > #define TDH_PHYMEM_PAGE_RDMD 24 > #define TDH_VP_RD 26 > #define TDH_PHYMEM_PAGE_RECLAIM 28 > +#define TDH_MEM_PAGE_REMOVE 29 > #define TDH_SYS_KEY_CONFIG 31 > #define TDH_SYS_INIT 33 > #define TDH_SYS_RD 34
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index f0b7b7b7d506..74938f725481 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -157,8 +157,10 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); u64 tdh_vp_init_apicid(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); u64 tdh_mem_track(struct tdx_td *tdr); +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx); u64 tdh_phymem_cache_wb(bool resume); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index c7e6f30d0a14..cde55e9b3280 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1761,6 +1761,23 @@ u64 tdh_mem_track(struct tdx_td *td) } EXPORT_SYMBOL_GPL(tdh_mem_track); +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx) +{ + struct tdx_module_args args = { + .rcx = gpa | level, + .rdx = tdx_tdr_pa(td), + }; + u64 ret; + + ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args); + + *rcx = args.rcx; + *rdx = args.rdx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mem_page_remove); + u64 tdh_phymem_cache_wb(bool resume) { struct tdx_module_args args = { @@ -1780,3 +1797,13 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); + +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid) +{ + struct tdx_module_args args = {}; + + args.rcx = hpa | (hkid << boot_cpu_data.x86_phys_bits); + + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); +} +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 4b0ad536afd9..d49cdd9b0577 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -33,6 +33,7 @@ #define TDH_PHYMEM_PAGE_RDMD 24 #define TDH_VP_RD 26 #define TDH_PHYMEM_PAGE_RECLAIM 28 +#define TDH_MEM_PAGE_REMOVE 29 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 #define TDH_SYS_RD 34