Message ID | 20250101074959.412696-11-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86/virt/tdx: Add SEAMCALL wrappers for KVM | expand |
On Wed, 2025-01-01 at 02:49 -0500, Paolo Bonzini wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > TDX architecture introduces the concept of private GPA vs shared GPA, > depending on the GPA.SHARED bit. The TDX module maintains a single Secure > EPT (S-EPT or SEPT) tree per TD to translate TD's private memory accessed > using a private GPA. Wrap the SEAMCALL TDH.MEM.PAGE.REMOVE with > tdh_mem_page_remove() and TDH_PHYMEM_PAGE_WBINVD with > tdh_phymem_page_wbinvd_hkid() to unmap a TD private page from the SEPT, > remove the TD private page from the TDX module and flush cache lines to > memory after removal of the private page. > > Callers should specify "GPA" and "level" when calling tdh_mem_page_remove() > to indicate to the TDX module which TD private page to unmap and remove. > > TDH.MEM.PAGE.REMOVE may fail, and the caller of tdh_mem_page_remove() can > check the function return value and retrieve extended error information > from the function output parameters. Follow the TLB tracking protocol > before calling tdh_mem_page_remove() to remove a TD private page to avoid > SEAMCALL failure. > > After removing a TD's private page, the TDX module does not write back and > invalidate cache lines associated with the page and the page's keyID (i.e., > the TD's guest keyID). Therefore, provide tdh_phymem_page_wbinvd_hkid() to > allow the caller to pass in the TD's guest keyID and invoke > TDH_PHYMEM_PAGE_WBINVD to perform this action. > > Before reusing the page, the host kernel needs to map the page with keyID 0 > and invoke movdir64b() to convert the TD private page to a normal shared > page. > > TDH.MEM.PAGE.REMOVE and TDH_PHYMEM_PAGE_WBINVD may meet contentions inside > the TDX module for TDX's internal resources. To avoid staying in SEAM mode > for too long, TDX module will return a BUSY error code to the kernel > instead of spinning on the locks. The caller may need to handle this error > in specific ways (e.g., retry). The wrappers return the SEAMCALL error code > directly to the caller. Don't attempt to handle it in the core kernel. > > [Kai: Switched from generic seamcall export] > [Yan: Re-wrote the changelog] > Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Signed-off-by: Kai Huang <kai.huang@intel.com> > Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> > Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> > Message-ID: <20241112073658.22157-1-yan.y.zhao@intel.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/include/asm/tdx.h | 2 ++ > arch/x86/virt/vmx/tdx/tdx.c | 27 +++++++++++++++++++++++++++ > arch/x86/virt/vmx/tdx/tdx.h | 1 + > 3 files changed, 30 insertions(+) > > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h > index f0b7b7b7d506..74938f725481 100644 > --- a/arch/x86/include/asm/tdx.h > +++ b/arch/x86/include/asm/tdx.h > @@ -157,8 +157,10 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); > u64 tdh_vp_init_apicid(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); > u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); > u64 tdh_mem_track(struct tdx_td *tdr); > +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx); > u64 tdh_phymem_cache_wb(bool resume); > u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); > +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid); > #else > static inline void tdx_init(void) { } > static inline int tdx_cpu_enable(void) { return -ENODEV; } > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c > index c7e6f30d0a14..cde55e9b3280 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.c > +++ b/arch/x86/virt/vmx/tdx/tdx.c > @@ -1761,6 +1761,23 @@ u64 tdh_mem_track(struct tdx_td *td) > } > EXPORT_SYMBOL_GPL(tdh_mem_track); > > +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx) level could be an int instead of a u64. An enum was also discussed, but considered to be not completely necessary. Probably we could even lose the level arg, depending on what we want to do about the one for page.aug. > +{ > + struct tdx_module_args args = { > + .rcx = gpa | level, Yan had "= gpa | (level & 0x7)" here, to make sure to only apply bits 0-2. > + .rdx = tdx_tdr_pa(td), > + }; > + u64 ret; > + > + ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args); > + > + *rcx = args.rcx; > + *rdx = args.rdx; Switch to extended_err1/2 if the others get changed. > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(tdh_mem_page_remove); > + > u64 tdh_phymem_cache_wb(bool resume) > { > struct tdx_module_args args = { > @@ -1780,3 +1797,13 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) > return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); > } > EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); > + > +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid) > +{ > + struct tdx_module_args args = {}; > + > + args.rcx = hpa | (hkid << boot_cpu_data.x86_phys_bits); > + > + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); > +} > +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); > diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h > index 4b0ad536afd9..d49cdd9b0577 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.h > +++ b/arch/x86/virt/vmx/tdx/tdx.h > @@ -33,6 +33,7 @@ > #define TDH_PHYMEM_PAGE_RDMD 24 > #define TDH_VP_RD 26 > #define TDH_PHYMEM_PAGE_RECLAIM 28 > +#define TDH_MEM_PAGE_REMOVE 29 > #define TDH_SYS_KEY_CONFIG 31 > #define TDH_SYS_INIT 33 > #define TDH_SYS_RD 34
New diffs and changelog: SEAMCALL RFC: - For tdh_mem_page_remove() a) Use struct tdx_td instead of raw TDR u64 b) Change "u64 level" to "int tdx_level". c) Change "u64 gpa" to "gfn_t gfn". (Reinette) d) Use union tdx_sept_gpa_mapping_info to initialize args.rcx. (Reinette) e) Use extended_err1/2 instead of rcx/rdx for output. - For tdh_phymem_page_wbinvd_hkid() a) Use "struct page *" instead of raw hpa. b) Change "u64 hkid" to "u16 hkid" (Reinette) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 980daa142e92..be0fc55186a8 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -168,8 +168,11 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); u64 tdh_vp_init_apicid(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); u64 tdh_mem_track(struct tdx_td *td); +u64 tdh_mem_page_remove(struct tdx_td *td, gfn_t gfn, int tdx_level, + u64 *extended_err1, u64 *extended_err2); u64 tdh_phymem_cache_wb(bool resume); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); +u64 tdh_phymem_page_wbinvd_hkid(struct page *page, u16 hkid); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 120a415c1d7a..b4e4cfce3475 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1862,6 +1862,25 @@ u64 tdh_mem_track(struct tdx_td *td) } EXPORT_SYMBOL_GPL(tdh_mem_track); +u64 tdh_mem_page_remove(struct tdx_td *td, gfn_t gfn, int tdx_level, + u64 *extended_err1, u64 *extended_err2) +{ + union tdx_sept_gpa_mapping_info gpa_info = { .level = tdx_level, .gfn = gfn, }; + struct tdx_module_args args = { + .rcx = gpa_info.full, + .rdx = tdx_tdr_pa(td), + }; + u64 ret; + + ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args); + + *extended_err1 = args.rcx; + *extended_err2 = args.rdx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mem_page_remove); + u64 tdh_phymem_cache_wb(bool resume) { struct tdx_module_args args = { @@ -1882,3 +1901,12 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) } EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); +u64 tdh_phymem_page_wbinvd_hkid(struct page *page, u16 hkid) +{ + struct tdx_module_args args = {}; + + args.rcx = page_to_phys(page) | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); + + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); +} +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 24e32c838926..4de17d9c2e8c 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -35,6 +35,7 @@ #define TDH_PHYMEM_PAGE_RDMD 24 #define TDH_VP_RD 26 #define TDH_PHYMEM_PAGE_RECLAIM 28 +#define TDH_MEM_PAGE_REMOVE 29 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 #define TDH_SYS_RD 34
On Tue, Jan 07, 2025 at 02:43:11PM +0800, Yan Zhao wrote: ... > +u64 tdh_phymem_page_wbinvd_hkid(struct page *page, u16 hkid) > +{ > + struct tdx_module_args args = {}; > + > + args.rcx = page_to_phys(page) | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); > + > + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); > +} For type of hkid is changed from u64 to u16. Here's a fixup patch to further have tdh_phymem_page_wbinvd_tdr() in [1] and the tdh_phymem_page_wbinvd_hkid() in this patch to use the common helper set_hkid_to_hpa(). [1] https://lore.kernel.org/kvm/20250101074959.412696-11-pbonzini@redhat.com/ commit 41f66e12a400516c6a851f0755f8abbe4dacb39b Author: Yan Zhao <yan.y.zhao@intel.com> Date: Wed Dec 11 18:11:24 2024 +0800 x86/virt/tdx: Move set_hkid_to_hpa() to x86 common header and use it Move set_hkid_to_hpa() from KVM TDX to x86 common header and have tdh_phymem_page_wbinvd_tdr() and tdh_phymem_page_wbinvd_hkid() to use it. Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 5420d07ee81c..5f3931e62c06 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -144,7 +144,13 @@ struct tdx_vp { struct page **tdcx_pages; }; +static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid) +{ + return pa | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); +} + /* SEAMCALL wrappers for creating/destroying/running TDX guests */ +u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_vp_enter(u64 tdvpr, struct tdx_module_args *args); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_mem_page_add(struct tdx_td *td, gfn_t gfn, struct page *private_page, diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index c3a84eb4694a..d86bfcbd6873 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -222,11 +222,6 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level) */ static DEFINE_PER_CPU(struct list_head, associated_tdvcpus); -static __always_inline hpa_t set_hkid_to_hpa(hpa_t pa, u16 hkid) -{ - return pa | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); -} - static __always_inline union vmx_exit_reason tdexit_exit_reason(struct kvm_vcpu *vcpu) { return (union vmx_exit_reason)(u32)(to_tdx(vcpu)->vp_enter_ret); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 6f002e36e421..0d7a0a27bd3e 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1930,7 +1930,7 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) { struct tdx_module_args args = {}; - args.rcx = tdx_tdr_pa(td) | ((u64)tdx_global_keyid << boot_cpu_data.x86_phys_bits); + args.rcx = set_hkid_to_hpa(tdx_tdr_pa(td), tdx_global_keyid); return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } @@ -1940,7 +1940,7 @@ u64 tdh_phymem_page_wbinvd_hkid(struct page *page, u16 hkid) { struct tdx_module_args args = {}; - args.rcx = page_to_phys(page) | ((hpa_t)hkid << boot_cpu_data.x86_phys_bits); + args.rcx = set_hkid_to_hpa(page_to_phys(page), hkid); return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); }
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index f0b7b7b7d506..74938f725481 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -157,8 +157,10 @@ u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); u64 tdh_vp_init_apicid(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); u64 tdh_mem_track(struct tdx_td *tdr); +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx); u64 tdh_phymem_cache_wb(bool resume); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index c7e6f30d0a14..cde55e9b3280 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1761,6 +1761,23 @@ u64 tdh_mem_track(struct tdx_td *td) } EXPORT_SYMBOL_GPL(tdh_mem_track); +u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *rcx, u64 *rdx) +{ + struct tdx_module_args args = { + .rcx = gpa | level, + .rdx = tdx_tdr_pa(td), + }; + u64 ret; + + ret = seamcall_ret(TDH_MEM_PAGE_REMOVE, &args); + + *rcx = args.rcx; + *rdx = args.rdx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mem_page_remove); + u64 tdh_phymem_cache_wb(bool resume) { struct tdx_module_args args = { @@ -1780,3 +1797,13 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); + +u64 tdh_phymem_page_wbinvd_hkid(u64 hpa, u64 hkid) +{ + struct tdx_module_args args = {}; + + args.rcx = hpa | (hkid << boot_cpu_data.x86_phys_bits); + + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); +} +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 4b0ad536afd9..d49cdd9b0577 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -33,6 +33,7 @@ #define TDH_PHYMEM_PAGE_RDMD 24 #define TDH_VP_RD 26 #define TDH_PHYMEM_PAGE_RECLAIM 28 +#define TDH_MEM_PAGE_REMOVE 29 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 #define TDH_SYS_RD 34