Message ID | 20250101074959.412696-8-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86/virt/tdx: Add SEAMCALL wrappers for KVM | expand |
On Wed, 2025-01-01 at 02:49 -0500, Paolo Bonzini wrote: > > +u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, u64 level, u64 hpa, u64 *rcx, u64 *rdx) > +{ > + struct tdx_module_args args = { > + .rcx = gpa | level, The bit packing here is a bit magic. Yan had been looking at adding a union like below to use internally in tdh_mem_sept_add() to pack the rcx register. union tdx_sept_gpa_mapping_info { struct { u64 level : 3; u64 reserved1 : 9; u64 gfn : 40; u64 reserved2 : 12; }; u64 full; }; It might be a bit verbose, but I agree something should be done. > + .rdx = tdx_tdr_pa(td), > + .r8 = hpa, > + }; > + u64 ret; > + > + clflush_cache_range(__va(hpa), PAGE_SIZE); > + ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args); > + > + *rcx = args.rcx; > + *rdx = args.rdx; This was changed to this: *extended_err1 = args.rcx; *extended_err2 = args.rdx; The reasoning is rcx and rdx are not very clear names. The caller only uses them for printing raw error codes, so don't bother creating a union or anything fancy. The print statements should print the raw error code to help in debugging situations where new bits are defined and returned by a future buggy TDX module. > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(tdh_mem_sept_add); > + > u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) > { > struct tdx_module_args args = { > diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h > index 62cb7832c42d..308d3aa565d7 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.h > +++ b/arch/x86/virt/vmx/tdx/tdx.h > @@ -16,6 +16,7 @@ > * TDX module SEAMCALL leaf functions > */ > #define TDH_MNG_ADDCX 1 > +#define TDH_MEM_SEPT_ADD 3 > #define TDH_VP_ADDCX 4 > #define TDH_MNG_KEY_CONFIG 8 > #define TDH_MNG_CREATE 9
Here's the proposed new version with changelog: SEAMCALL RFC: - Use struct tdx_td instead of raw TDR u64. - Use "struct page *" for sept_page instead of raw hpa. - Change "u64 level" to "int level". - Rename "level" to "tdx_level" as it's tdx specfic. (Rick) - Change "u64 gpa" to "gfn_t gfn". (Reinette) - Introduce a union tdx_sept_gpa_mapping_info to initialize args.rcx. (Reinette) - Call tdx_clflush_page() instead of clflush_cache_range(). - Use extended_err1/2 instead of rcx/rdx for output. diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index c6d0668de167..ea5a98fd1dd5 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -105,6 +105,20 @@ struct tdx_module_args { u64 rsi; }; +/* + * Used to specify SEPT GPA mapping information for SEPT related SEAMCALLs and + * TDCALLs. + */ +union tdx_sept_gpa_mapping_info { + struct { + u64 level : 3; + u64 reserved1 : 9; + u64 gfn : 40; + u64 reserved2 : 12; + }; + u64 full; +}; + /* Used to communicate with the TDX module */ u64 __tdcall(u64 fn, struct tdx_module_args *args); u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index bbb8f0bae9ba..787c359a5fc9 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -33,6 +33,7 @@ #ifndef __ASSEMBLY__ #include <uapi/asm/mce.h> +#include <linux/kvm_types.h> #include "tdx_global_metadata.h" /* @@ -143,6 +144,8 @@ struct tdx_vp { }; u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); +u64 tdh_mem_sept_add(struct tdx_td *td, gfn_t gfn, int tdx_level, struct page *sept_page, + u64 *extended_err1, u64 *extended_err2); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u64 hkid); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index a16c507296df..adb2059b6b5f 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1583,6 +1583,27 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) } EXPORT_SYMBOL_GPL(tdh_mng_addcx); +u64 tdh_mem_sept_add(struct tdx_td *td, gfn_t gfn, int tdx_level, struct page *sept_page, + u64 *extended_err1, u64 *extended_err2) +{ + union tdx_sept_gpa_mapping_info gpa_info = { .level = tdx_level, .gfn = gfn, }; + struct tdx_module_args args = { + .rcx = gpa_info.full, + .rdx = tdx_tdr_pa(td), + .r8 = page_to_phys(sept_page), + }; + u64 ret; + + tdx_clflush_page(sept_page); + ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args); + + *extended_err1 = args.rcx; + *extended_err2 = args.rdx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mem_sept_add); + u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { struct tdx_module_args args = { diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 08b01b7fe7c2..0d1ba0d0ac82 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -18,6 +18,7 @@ * TDX module SEAMCALL leaf functions */ #define TDH_MNG_ADDCX 1 +#define TDH_MEM_SEPT_ADD 3 #define TDH_VP_ADDCX 4 #define TDH_MNG_KEY_CONFIG 8 #define TDH_MNG_CREATE 9
On 1/2/25 13:59, Edgecombe, Rick P wrote: > union tdx_sept_gpa_mapping_info { > struct { > u64 level : 3; > u64 reserved1 : 9; > u64 gfn : 40; > u64 reserved2 : 12; > }; > u64 full; > }; This is functionally OK, but seeing bitfields on a value that's probably going to get shifted around makes me nervous because of: > https://lore.kernel.org/lkml/20231111020019.553664-1-michael.roth@amd.com/ I wouldn't NAK it just for this, but it's also not how I would code it up.
On Tue, Jan 07, 2025 at 11:48:12AM -0800, Dave Hansen wrote: > On 1/2/25 13:59, Edgecombe, Rick P wrote: > > union tdx_sept_gpa_mapping_info { > > struct { > > u64 level : 3; > > u64 reserved1 : 9; > > u64 gfn : 40; > > u64 reserved2 : 12; > > }; > > u64 full; > > }; > > This is functionally OK, but seeing bitfields on a value that's probably > going to get shifted around makes me nervous because of: This is defined according to the TDX spec. e.g. in TDH.MEM.SEPT.ADD: RCX | EPT mapping information: ----|--------------------------------------------------------------------------- | Bits | Name | Description |------|----------|--------------------------------------------------------- | 2:0 | Level | Level of the non-leaf Secure EPT entry that will map the | | | new Secure EPT page - see 3.6.1 | | | Level must between 1 and 3 for a 4-level EPT or between | | | 1 and 4 for a 5-level EPT. |------|----------|--------------------------------------------------------- | 11:3 | Reserved | Reserved: must be 0 |------|----------|--------------------------------------------------------- | 51:12| GPA | Bits 51:12 of the guest physical address of to be mapped | | | for the new Secure EPT page Depending on the level, the | | | following least significant bits must be 0: | | | Level 1 (EPT): Bits 20:12 | | | Level 2 (EPD): Bits 29:12 | | | Level 3 (EPDPT): Bits 38:12 | | | Level 4 (EPML4): Bits 47:12 |------|----------|--------------------------------------------------------- | 63:52| Reserved | Reserved: must be 0 So, why does this bitfields definition make things worse? > > https://lore.kernel.org/lkml/20231111020019.553664-1-michael.roth@amd.com/ > I wouldn't NAK it just for this, but it's also not how I would code it up.
On 1/7/25 17:12, Yan Zhao wrote:
> So, why does this bitfields definition make things worse?
Look at the kernel page table management. Why don't we use bitfields for
_that_? Look at the link I sent. Bitfields can cause some really goofy
unexpected behavior if you pass them around like they were a full type.
On Tue, 2025-01-07 at 17:20 -0800, Dave Hansen wrote: > On 1/7/25 17:12, Yan Zhao wrote: > > So, why does this bitfields definition make things worse? > > Look at the kernel page table management. Why don't we use bitfields for > _that_? Look at the link I sent. Bitfields can cause some really goofy > unexpected behavior if you pass them around like they were a full type. Huh, so this enum is unsafe for reading out the individual fields because if shifting them, it will perform the shift with the size of the source bit field size. It is safe in the way it is being used in these patches, which is to encode a u64. But if we ever started to use tdx_sept_gpa_mapping_info to process output from a SEAMCALL, or something, we could set ourselves up for the same problem as the SEV bug. Let's not open code the encoding in each SEAMCALL though. What about replacing it with just a helper that encodes the u64 gpa from two args: gfn and tdx_level. We could add some specific over-size behavior for the fields, but I'd think it would be ok to keep it simple. Maybe something like this: static u64 encode_gpa_mapping_info(gfn_t gfn, unsigned int tdx_level) { u64 val = 0; val |= level; val |= gfn << TDX_MAPPING_INFO_GFN_SHIFT; return val; }
On Wed, Jan 08, 2025 at 09:43:37AM +0800, Edgecombe, Rick P wrote: > On Tue, 2025-01-07 at 17:20 -0800, Dave Hansen wrote: > > On 1/7/25 17:12, Yan Zhao wrote: > > > So, why does this bitfields definition make things worse? > > > > Look at the kernel page table management. Why don't we use bitfields for > > _that_? Look at the link I sent. Bitfields can cause some really goofy > > unexpected behavior if you pass them around like they were a full type. > > Huh, so this enum is unsafe for reading out the individual fields because if > shifting them, it will perform the shift with the size of the source bit field > size. It is safe in the way it is being used in these patches, which is to > encode a u64. But if we ever started to use tdx_sept_gpa_mapping_info to process > output from a SEAMCALL, or something, we could set ourselves up for the same > problem as the SEV bug. Thanks for the explanation! Sorry that I didn't clearly explain the usage to Dave. > Let's not open code the encoding in each SEAMCALL though. What about replacing > it with just a helper that encodes the u64 gpa from two args: gfn and tdx_level. > We could add some specific over-size behavior for the fields, but I'd think it > would be ok to keep it simple. Maybe something like this: > > static u64 encode_gpa_mapping_info(gfn_t gfn, unsigned int tdx_level) > { > u64 val = 0; > > val |= level; > val |= gfn << TDX_MAPPING_INFO_GFN_SHIFT; > > return val; > } That's a clever alternative :)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 8fc1836c5d09..9e0c60a41d5d 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -138,6 +138,7 @@ struct tdx_vp { }; u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); +u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, u64 level, u64 hpa, u64 *rcx, u64 *rdx); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u64 hkid); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 986b58b63121..a97a470dda23 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1491,6 +1491,25 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) } EXPORT_SYMBOL_GPL(tdh_mng_addcx); +u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, u64 level, u64 hpa, u64 *rcx, u64 *rdx) +{ + struct tdx_module_args args = { + .rcx = gpa | level, + .rdx = tdx_tdr_pa(td), + .r8 = hpa, + }; + u64 ret; + + clflush_cache_range(__va(hpa), PAGE_SIZE); + ret = seamcall_ret(TDH_MEM_SEPT_ADD, &args); + + *rcx = args.rcx; + *rdx = args.rdx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mem_sept_add); + u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) { struct tdx_module_args args = { diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 62cb7832c42d..308d3aa565d7 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -16,6 +16,7 @@ * TDX module SEAMCALL leaf functions */ #define TDH_MNG_ADDCX 1 +#define TDH_MEM_SEPT_ADD 3 #define TDH_VP_ADDCX 4 #define TDH_MNG_KEY_CONFIG 8 #define TDH_MNG_CREATE 9