diff mbox series

[v19,027/130] KVM: TDX: Define TDX architectural definitions

Message ID 522cbfe6e5a351f88480790fe3c3be36c82ca4b1.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:25 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Define architectural definitions for KVM to issue the TDX SEAMCALLs.

Structures and values that are architecturally defined in the TDX module
specifications the chapter of ABI Reference.

Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
v19:
- drop tdvmcall constants by Xiaoyao

v18:
- Add metadata field id

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/tdx_arch.h | 265 ++++++++++++++++++++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 arch/x86/kvm/vmx/tdx_arch.h

Comments

Yan Zhao March 1, 2024, 7:25 a.m. UTC | #1
> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> + */
> +#define TDX_MAX_VCPUS	(~(u16)0)
This value will be treated as -1 in tdx_vm_init(),
	"kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);"

This will lead to kvm->max_vcpus being -1 by default.
Is this by design or just an error?
If it's by design, why not set kvm->max_vcpus = -1 in tdx_vm_init() directly.
If an unexpected error, may below is better?

#define TDX_MAX_VCPUS   (int)((u16)(~0UL))
or
#define TDX_MAX_VCPUS 65536
Isaku Yamahata March 5, 2024, 8:21 a.m. UTC | #2
On Fri, Mar 01, 2024 at 03:25:31PM +0800,
Yan Zhao <yan.y.zhao@intel.com> wrote:

> > + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> > + */
> > +#define TDX_MAX_VCPUS	(~(u16)0)
> This value will be treated as -1 in tdx_vm_init(),
> 	"kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);"
> 
> This will lead to kvm->max_vcpus being -1 by default.
> Is this by design or just an error?
> If it's by design, why not set kvm->max_vcpus = -1 in tdx_vm_init() directly.
> If an unexpected error, may below is better?
> 
> #define TDX_MAX_VCPUS   (int)((u16)(~0UL))
> or
> #define TDX_MAX_VCPUS 65536

You're right. I'll use ((int)U16_MAX).
As TDX 1.5 introduced metadata MAX_VCPUS_PER_TD, I'll update to get the value
and trim it further. Something following.

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index f964d99f8701..31205f84d594 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -23,7 +23,6 @@ KVM_X86_OP(has_emulated_msr)
 /* TODO: Once all backend implemented this op, remove _OPTIONAL_RET0. */
 KVM_X86_OP_OPTIONAL_RET0(vcpu_check_cpuid)
 KVM_X86_OP(vcpu_after_set_cpuid)
-KVM_X86_OP_OPTIONAL(max_vcpus);
 KVM_X86_OP_OPTIONAL(vm_enable_cap)
 KVM_X86_OP(vm_init)
 KVM_X86_OP_OPTIONAL(flush_shadow_all_private)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 6dd78230c9d4..deb59e94990f 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -13,17 +13,6 @@
 static bool enable_tdx __ro_after_init;
 module_param_named(tdx, enable_tdx, bool, 0444);
 
-static int vt_max_vcpus(struct kvm *kvm)
-{
-	if (!kvm)
-		return KVM_MAX_VCPUS;
-
-	if (is_td(kvm))
-		return min(kvm->max_vcpus, TDX_MAX_VCPUS);
-
-	return kvm->max_vcpus;
-}
-
 #if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_INTEL_TDX_HOST)
 static int vt_flush_remote_tlbs(struct kvm *kvm);
 static int vt_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, gfn_t nr_pages);
@@ -1130,7 +1119,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 	.hardware_disable = vt_hardware_disable,
 	.has_emulated_msr = vt_has_emulated_msr,
 
-	.max_vcpus = vt_max_vcpus,
 	.vm_size = sizeof(struct kvm_vmx),
 	.vm_enable_cap = vt_vm_enable_cap,
 	.vm_init = vt_vm_init,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 7cca6a33ad97..a8cfb4f214a6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -63,6 +63,8 @@ struct tdx_info {
 	u8 nr_tdcs_pages;
 	u8 nr_tdvpx_pages;
 
+	u16 max_vcpus_per_td;
+
 	/*
 	 * The number of WBINVD domains. 0 means that wbinvd domain is cpu
 	 * package.
@@ -100,7 +102,8 @@ int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		if (cap->flags || cap->args[0] == 0)
 			return -EINVAL;
 		if (cap->args[0] > KVM_MAX_VCPUS ||
-		    cap->args[0] > TDX_MAX_VCPUS)
+		    cap->args[0] > TDX_MAX_VCPUS ||
+		    cap->args[0] > tdx_info->max_vcpus_per_td)
 			return -E2BIG;
 
 		mutex_lock(&kvm->lock);
@@ -729,7 +732,8 @@ int tdx_vm_init(struct kvm *kvm)
 	 * TDX has its own limit of the number of vcpus in addition to
 	 * KVM_MAX_VCPUS.
 	 */
-	kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);
+	kvm->max_vcpus = min3(kvm->max_vcpus, tdx_info->max_vcpus_per_td,
+			     TDX_MAX_VCPUS);
 
 	mutex_init(&to_kvm_tdx(kvm)->source_lock);
 	return 0;
@@ -4667,6 +4671,7 @@ static int __init tdx_module_setup(void)
 		TDX_INFO_MAP(ATTRS_FIXED1, attributes_fixed1),
 		TDX_INFO_MAP(XFAM_FIXED0, xfam_fixed0),
 		TDX_INFO_MAP(XFAM_FIXED1, xfam_fixed1),
+		TDX_INFO_MAP(MAX_VCPUS_PER_TD, max_vcpus_per_td),
 	};
 
 	ret = tdx_enable();
diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
index b10dad8f46bb..711855be6c83 100644
--- a/arch/x86/kvm/vmx/tdx_arch.h
+++ b/arch/x86/kvm/vmx/tdx_arch.h
@@ -144,7 +144,7 @@ struct tdx_cpuid_value {
 /*
  * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
  */
-#define TDX_MAX_VCPUS	(~(u16)0)
+#define TDX_MAX_VCPUS	((int)U16_MAX)
 
 struct td_params {
 	u64 attributes;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d097024fd974..6822a50e1d5d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4733,8 +4733,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
-		if (kvm_x86_ops.max_vcpus)
-			r = static_call(kvm_x86_max_vcpus)(kvm);
+		if (kvm)
+			r = kvm->max_vcpus;
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
 		r = KVM_MAX_VCPU_IDS;
Binbin Wu March 14, 2024, 7:30 a.m. UTC | #3
On 2/26/2024 4:25 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Define architectural definitions for KVM to issue the TDX SEAMCALLs.
>
> Structures and values that are architecturally defined in the TDX module
> specifications the chapter of ABI Reference.
>
> Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> v19:
> - drop tdvmcall constants by Xiaoyao
>
> v18:
> - Add metadata field id
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/vmx/tdx_arch.h | 265 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 265 insertions(+)
>   create mode 100644 arch/x86/kvm/vmx/tdx_arch.h
>
> diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
> new file mode 100644
> index 000000000000..e2c1a6f429d7
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx_arch.h
> @@ -0,0 +1,265 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* architectural constants/data definitions for TDX SEAMCALLs */
> +
> +#ifndef __KVM_X86_TDX_ARCH_H
> +#define __KVM_X86_TDX_ARCH_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * TDX SEAMCALL API function leaves
> + */
> +#define TDH_VP_ENTER			0
> +#define TDH_MNG_ADDCX			1
> +#define TDH_MEM_PAGE_ADD		2
> +#define TDH_MEM_SEPT_ADD		3
> +#define TDH_VP_ADDCX			4
> +#define TDH_MEM_PAGE_RELOCATE		5
> +#define TDH_MEM_PAGE_AUG		6
> +#define TDH_MEM_RANGE_BLOCK		7
> +#define TDH_MNG_KEY_CONFIG		8
> +#define TDH_MNG_CREATE			9
> +#define TDH_VP_CREATE			10
> +#define TDH_MNG_RD			11
> +#define TDH_MR_EXTEND			16
> +#define TDH_MR_FINALIZE			17
> +#define TDH_VP_FLUSH			18
> +#define TDH_MNG_VPFLUSHDONE		19
> +#define TDH_MNG_KEY_FREEID		20
> +#define TDH_MNG_INIT			21
> +#define TDH_VP_INIT			22
> +#define TDH_MEM_SEPT_RD			25
> +#define TDH_VP_RD			26
> +#define TDH_MNG_KEY_RECLAIMID		27
> +#define TDH_PHYMEM_PAGE_RECLAIM		28
> +#define TDH_MEM_PAGE_REMOVE		29
> +#define TDH_MEM_SEPT_REMOVE		30
> +#define TDH_SYS_RD			34
> +#define TDH_MEM_TRACK			38
> +#define TDH_MEM_RANGE_UNBLOCK		39
> +#define TDH_PHYMEM_CACHE_WB		40
> +#define TDH_PHYMEM_PAGE_WBINVD		41
> +#define TDH_VP_WR			43
> +#define TDH_SYS_LP_SHUTDOWN		44
> +
> +/* TDX control structure (TDR/TDCS/TDVPS) field access codes */
> +#define TDX_NON_ARCH			BIT_ULL(63)
> +#define TDX_CLASS_SHIFT			56
> +#define TDX_FIELD_MASK			GENMASK_ULL(31, 0)
> +
> +#define __BUILD_TDX_FIELD(non_arch, class, field)	\
> +	(((non_arch) ? TDX_NON_ARCH : 0) |		\
> +	 ((u64)(class) << TDX_CLASS_SHIFT) |		\
> +	 ((u64)(field) & TDX_FIELD_MASK))
> +
> +#define BUILD_TDX_FIELD(class, field)			\
> +	__BUILD_TDX_FIELD(false, (class), (field))
> +
> +#define BUILD_TDX_FIELD_NON_ARCH(class, field)		\
> +	__BUILD_TDX_FIELD(true, (class), (field))
> +
> +
> +/* Class code for TD */
> +#define TD_CLASS_EXECUTION_CONTROLS	17ULL
> +
> +/* Class code for TDVPS */
> +#define TDVPS_CLASS_VMCS		0ULL
> +#define TDVPS_CLASS_GUEST_GPR		16ULL
> +#define TDVPS_CLASS_OTHER_GUEST		17ULL
> +#define TDVPS_CLASS_MANAGEMENT		32ULL
> +
> +enum tdx_tdcs_execution_control {
> +	TD_TDCS_EXEC_TSC_OFFSET = 10,
> +};
> +
> +/* @field is any of enum tdx_tdcs_execution_control */
> +#define TDCS_EXEC(field)		BUILD_TDX_FIELD(TD_CLASS_EXECUTION_CONTROLS, (field))
> +
> +/* @field is the VMCS field encoding */
> +#define TDVPS_VMCS(field)		BUILD_TDX_FIELD(TDVPS_CLASS_VMCS, (field))
> +
> +enum tdx_vcpu_guest_other_state {
> +	TD_VCPU_STATE_DETAILS_NON_ARCH = 0x100,
> +};
> +
> +union tdx_vcpu_state_details {
> +	struct {
> +		u64 vmxip	: 1;
> +		u64 reserved	: 63;
> +	};
> +	u64 full;
> +};
> +
> +/* @field is any of enum tdx_guest_other_state */
> +#define TDVPS_STATE(field)		BUILD_TDX_FIELD(TDVPS_CLASS_OTHER_GUEST, (field))
> +#define TDVPS_STATE_NON_ARCH(field)	BUILD_TDX_FIELD_NON_ARCH(TDVPS_CLASS_OTHER_GUEST, (field))
> +
> +/* Management class fields */
> +enum tdx_vcpu_guest_management {
> +	TD_VCPU_PEND_NMI = 11,
> +};
> +
> +/* @field is any of enum tdx_vcpu_guest_management */
> +#define TDVPS_MANAGEMENT(field)		BUILD_TDX_FIELD(TDVPS_CLASS_MANAGEMENT, (field))
> +
> +#define TDX_EXTENDMR_CHUNKSIZE		256
> +
> +struct tdx_cpuid_value {
> +	u32 eax;
> +	u32 ebx;
> +	u32 ecx;
> +	u32 edx;
> +} __packed;
> +
> +#define TDX_TD_ATTRIBUTE_DEBUG		BIT_ULL(0)
> +#define TDX_TD_ATTR_SEPT_VE_DISABLE	BIT_ULL(28)
It's better to align the style of the naming.

Either use TDX_TD_ATTR_* or TDX_TD_ATTRIBUTE_*?

> +#define TDX_TD_ATTRIBUTE_PKS		BIT_ULL(30)
> +#define TDX_TD_ATTRIBUTE_KL		BIT_ULL(31)
> +#define TDX_TD_ATTRIBUTE_PERFMON	BIT_ULL(63)
> +
> +/*
> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> + */
> +#define TDX_MAX_VCPUS	(~(u16)0)
> +
> +struct td_params {
> +	u64 attributes;
> +	u64 xfam;
> +	u16 max_vcpus;
> +	u8 reserved0[6];
> +
> +	u64 eptp_controls;
> +	u64 exec_controls;
> +	u16 tsc_frequency;
> +	u8  reserved1[38];
> +
> +	u64 mrconfigid[6];
> +	u64 mrowner[6];
> +	u64 mrownerconfig[6];
> +	u64 reserved2[4];
> +
> +	union {
> +		DECLARE_FLEX_ARRAY(struct tdx_cpuid_value, cpuid_values);
> +		u8 reserved3[768];
> +	};
> +} __packed __aligned(1024);
> +
> +/*
> + * Guest uses MAX_PA for GPAW when set.
> + * 0: GPA.SHARED bit is GPA[47]
> + * 1: GPA.SHARED bit is GPA[51]
> + */
> +#define TDX_EXEC_CONTROL_MAX_GPAW      BIT_ULL(0)
> +
> +/*
> + * TDH.VP.ENTER, TDG.VP.VMCALL preserves RBP
> + * 0: RBP can be used for TDG.VP.VMCALL input. RBP is clobbered.
> + * 1: RBP can't be used for TDG.VP.VMCALL input. RBP is preserved.
> + */
> +#define TDX_CONTROL_FLAG_NO_RBP_MOD	BIT_ULL(2)
> +
> +
> +/*
> + * TDX requires the frequency to be defined in units of 25MHz, which is the
> + * frequency of the core crystal clock on TDX-capable platforms, i.e. the TDX
> + * module can only program frequencies that are multiples of 25MHz.  The
> + * frequency must be between 100mhz and 10ghz (inclusive).
> + */
> +#define TDX_TSC_KHZ_TO_25MHZ(tsc_in_khz)	((tsc_in_khz) / (25 * 1000))
> +#define TDX_TSC_25MHZ_TO_KHZ(tsc_in_25mhz)	((tsc_in_25mhz) * (25 * 1000))
> +#define TDX_MIN_TSC_FREQUENCY_KHZ		(100 * 1000)
> +#define TDX_MAX_TSC_FREQUENCY_KHZ		(10 * 1000 * 1000)
> +
> +union tdx_sept_entry {
> +	struct {
> +		u64 r		:  1;
> +		u64 w		:  1;
> +		u64 x		:  1;
> +		u64 mt		:  3;
> +		u64 ipat	:  1;
> +		u64 leaf	:  1;
> +		u64 a		:  1;
> +		u64 d		:  1;
> +		u64 xu		:  1;
> +		u64 ignored0	:  1;
> +		u64 pfn		: 40;
> +		u64 reserved	:  5;
> +		u64 vgp		:  1;
> +		u64 pwa		:  1;
> +		u64 ignored1	:  1;
> +		u64 sss		:  1;
> +		u64 spp		:  1;
> +		u64 ignored2	:  1;
> +		u64 sve		:  1;
> +	};
> +	u64 raw;
> +};
> +
> +enum tdx_sept_entry_state {
> +	TDX_SEPT_FREE = 0,
> +	TDX_SEPT_BLOCKED = 1,
> +	TDX_SEPT_PENDING = 2,
> +	TDX_SEPT_PENDING_BLOCKED = 3,
> +	TDX_SEPT_PRESENT = 4,
> +};
> +
> +union tdx_sept_level_state {
> +	struct {
> +		u64 level	:  3;
> +		u64 reserved0	:  5;
> +		u64 state	:  8;
> +		u64 reserved1	: 48;
> +	};
> +	u64 raw;
> +};
> +
> +/*
> + * Global scope metadata field ID.
> + * See Table "Global Scope Metadata", TDX module 1.5 ABI spec.
> + */
> +#define MD_FIELD_ID_SYS_ATTRIBUTES		0x0A00000200000000ULL
> +#define MD_FIELD_ID_FEATURES0			0x0A00000300000008ULL
> +#define MD_FIELD_ID_ATTRS_FIXED0		0x1900000300000000ULL
> +#define MD_FIELD_ID_ATTRS_FIXED1		0x1900000300000001ULL
> +#define MD_FIELD_ID_XFAM_FIXED0			0x1900000300000002ULL
> +#define MD_FIELD_ID_XFAM_FIXED1			0x1900000300000003ULL
> +
> +#define MD_FIELD_ID_TDCS_BASE_SIZE		0x9800000100000100ULL
> +#define MD_FIELD_ID_TDVPS_BASE_SIZE		0x9800000100000200ULL
> +
> +#define MD_FIELD_ID_NUM_CPUID_CONFIG		0x9900000100000004ULL
> +#define MD_FIELD_ID_CPUID_CONFIG_LEAVES		0x9900000300000400ULL
> +#define MD_FIELD_ID_CPUID_CONFIG_VALUES		0x9900000300000500ULL
> +
> +#define MD_FIELD_ID_FEATURES0_NO_RBP_MOD	BIT_ULL(18)
> +
> +#define TDX_MAX_NR_CPUID_CONFIGS       37
> +
> +#define TDX_MD_ELEMENT_SIZE_8BITS      0
> +#define TDX_MD_ELEMENT_SIZE_16BITS     1
> +#define TDX_MD_ELEMENT_SIZE_32BITS     2
> +#define TDX_MD_ELEMENT_SIZE_64BITS     3
> +
> +union tdx_md_field_id {
> +	struct {
> +		u64 field                       : 24;
> +		u64 reserved0                   : 8;
> +		u64 element_size_code           : 2;
> +		u64 last_element_in_field       : 4;
> +		u64 reserved1                   : 3;
> +		u64 inc_size                    : 1;
> +		u64 write_mask_valid            : 1;
> +		u64 context                     : 3;
> +		u64 reserved2                   : 1;
> +		u64 class                       : 6;
> +		u64 reserved3                   : 1;
> +		u64 non_arch                    : 1;
> +	};
> +	u64 raw;
> +};
> +
> +#define TDX_MD_ELEMENT_SIZE_CODE(_field_id)			\
> +	({ union tdx_md_field_id _fid = { .raw = (_field_id)};  \
> +		_fid.element_size_code; })
> +
> +#endif /* __KVM_X86_TDX_ARCH_H */
Isaku Yamahata March 14, 2024, 4:48 p.m. UTC | #4
On Thu, Mar 14, 2024 at 03:30:43PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> > +#define TDX_TD_ATTRIBUTE_DEBUG		BIT_ULL(0)
> > +#define TDX_TD_ATTR_SEPT_VE_DISABLE	BIT_ULL(28)
> It's better to align the style of the naming.
> 
> Either use TDX_TD_ATTR_* or TDX_TD_ATTRIBUTE_*?

Good point. I'll adopt TDX_TD_ATTR_* because TDX_TD_ATTR_SEPT_VE_DISABLE is
already long.
Huang, Kai March 21, 2024, 9:57 p.m. UTC | #5
> +/*
> + * TDX SEAMCALL API function leaves
> + */
> +#define TDH_VP_ENTER			0
> +#define TDH_MNG_ADDCX			1
> +#define TDH_MEM_PAGE_ADD		2
> +#define TDH_MEM_SEPT_ADD		3
> +#define TDH_VP_ADDCX			4
> +#define TDH_MEM_PAGE_RELOCATE		5

I don't think the "RELOCATE" is needed in this patchset?

> +#define TDH_MEM_PAGE_AUG		6
> +#define TDH_MEM_RANGE_BLOCK		7
> +#define TDH_MNG_KEY_CONFIG		8
> +#define TDH_MNG_CREATE			9
> +#define TDH_VP_CREATE			10
> +#define TDH_MNG_RD			11
> +#define TDH_MR_EXTEND			16
> +#define TDH_MR_FINALIZE			17
> +#define TDH_VP_FLUSH			18
> +#define TDH_MNG_VPFLUSHDONE		19
> +#define TDH_MNG_KEY_FREEID		20
> +#define TDH_MNG_INIT			21
> +#define TDH_VP_INIT			22
> +#define TDH_MEM_SEPT_RD			25
> +#define TDH_VP_RD			26
> +#define TDH_MNG_KEY_RECLAIMID		27
> +#define TDH_PHYMEM_PAGE_RECLAIM		28
> +#define TDH_MEM_PAGE_REMOVE		29
> +#define TDH_MEM_SEPT_REMOVE		30
> +#define TDH_SYS_RD			34
> +#define TDH_MEM_TRACK			38
> +#define TDH_MEM_RANGE_UNBLOCK		39
> +#define TDH_PHYMEM_CACHE_WB		40
> +#define TDH_PHYMEM_PAGE_WBINVD		41
> +#define TDH_VP_WR			43
> +#define TDH_SYS_LP_SHUTDOWN		44

And LP_SHUTDOWN is certainly not needed.

Could you check whether there are others that are not needed?

Perhaps we should just include macros that got used, but anyway.

[...]

> +
> +/*
> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> + */

Why is this comment applied to TDX_MAX_VCPUS?

> +#define TDX_MAX_VCPUS	(~(u16)0)

And is (~(16)0) an architectural value defined by TDX spec, or just SW 
value that you just put here for convenience?

I mean, is it possible that different version of TDX module have 
different implementation of MAX_CPU, e.g., module 1.0 only supports X 
but module 1.5 increases to Y where Y > X?

Anyway, looks you can safely move this to the patch to enable CAP_MAX_CPU?

> +
> +struct td_params {
> +	u64 attributes;
> +	u64 xfam;
> +	u16 max_vcpus;
> +	u8 reserved0[6];
> +
> +	u64 eptp_controls;
> +	u64 exec_controls;
> +	u16 tsc_frequency;
> +	u8  reserved1[38];
> +
> +	u64 mrconfigid[6];
> +	u64 mrowner[6];
> +	u64 mrownerconfig[6];
> +	u64 reserved2[4];
> +
> +	union {
> +		DECLARE_FLEX_ARRAY(struct tdx_cpuid_value, cpuid_values);
> +		u8 reserved3[768];

I am not sure you need the 'reseved3[768]', unless you need to make 
sieof(struct td_params) return 1024?

> +	};
> +} __packed __aligned(1024); > +

[...]

> +
> +#define TDX_MD_ELEMENT_SIZE_8BITS      0
> +#define TDX_MD_ELEMENT_SIZE_16BITS     1
> +#define TDX_MD_ELEMENT_SIZE_32BITS     2
> +#define TDX_MD_ELEMENT_SIZE_64BITS     3
> +
> +union tdx_md_field_id {
> +	struct {
> +		u64 field                       : 24;
> +		u64 reserved0                   : 8;
> +		u64 element_size_code           : 2;
> +		u64 last_element_in_field       : 4;
> +		u64 reserved1                   : 3;
> +		u64 inc_size                    : 1;
> +		u64 write_mask_valid            : 1;
> +		u64 context                     : 3;
> +		u64 reserved2                   : 1;
> +		u64 class                       : 6;
> +		u64 reserved3                   : 1;
> +		u64 non_arch                    : 1;
> +	};
> +	u64 raw;
> +};

Could you clarify why we need such detailed definition?  For metadata 
element size you can use simple '&' and '<<' to get the result.

> +
> +#define TDX_MD_ELEMENT_SIZE_CODE(_field_id)			\
> +	({ union tdx_md_field_id _fid = { .raw = (_field_id)};  \
> +		_fid.element_size_code; })
> +
> +#endif /* __KVM_X86_TDX_ARCH_H */
Yuan Yao March 22, 2024, 7:06 a.m. UTC | #6
On Mon, Feb 26, 2024 at 12:25:29AM -0800, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Define architectural definitions for KVM to issue the TDX SEAMCALLs.
>
> Structures and values that are architecturally defined in the TDX module
> specifications the chapter of ABI Reference.
>
> Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> v19:
> - drop tdvmcall constants by Xiaoyao
>
> v18:
> - Add metadata field id
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>  arch/x86/kvm/vmx/tdx_arch.h | 265 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 265 insertions(+)
>  create mode 100644 arch/x86/kvm/vmx/tdx_arch.h
>
> diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
> new file mode 100644
> index 000000000000..e2c1a6f429d7
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx_arch.h
> @@ -0,0 +1,265 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* architectural constants/data definitions for TDX SEAMCALLs */
> +
> +#ifndef __KVM_X86_TDX_ARCH_H
> +#define __KVM_X86_TDX_ARCH_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * TDX SEAMCALL API function leaves
> + */
> +#define TDH_VP_ENTER			0
> +#define TDH_MNG_ADDCX			1
> +#define TDH_MEM_PAGE_ADD		2
> +#define TDH_MEM_SEPT_ADD		3
> +#define TDH_VP_ADDCX			4
> +#define TDH_MEM_PAGE_RELOCATE		5
> +#define TDH_MEM_PAGE_AUG		6
> +#define TDH_MEM_RANGE_BLOCK		7
> +#define TDH_MNG_KEY_CONFIG		8
> +#define TDH_MNG_CREATE			9
> +#define TDH_VP_CREATE			10
> +#define TDH_MNG_RD			11
> +#define TDH_MR_EXTEND			16
> +#define TDH_MR_FINALIZE			17
> +#define TDH_VP_FLUSH			18
> +#define TDH_MNG_VPFLUSHDONE		19
> +#define TDH_MNG_KEY_FREEID		20
> +#define TDH_MNG_INIT			21
> +#define TDH_VP_INIT			22
> +#define TDH_MEM_SEPT_RD			25
> +#define TDH_VP_RD			26
> +#define TDH_MNG_KEY_RECLAIMID		27
> +#define TDH_PHYMEM_PAGE_RECLAIM		28
> +#define TDH_MEM_PAGE_REMOVE		29
> +#define TDH_MEM_SEPT_REMOVE		30
> +#define TDH_SYS_RD			34
> +#define TDH_MEM_TRACK			38
> +#define TDH_MEM_RANGE_UNBLOCK		39
> +#define TDH_PHYMEM_CACHE_WB		40
> +#define TDH_PHYMEM_PAGE_WBINVD		41
> +#define TDH_VP_WR			43
> +#define TDH_SYS_LP_SHUTDOWN		44
> +
> +/* TDX control structure (TDR/TDCS/TDVPS) field access codes */
> +#define TDX_NON_ARCH			BIT_ULL(63)
> +#define TDX_CLASS_SHIFT			56
> +#define TDX_FIELD_MASK			GENMASK_ULL(31, 0)
> +
> +#define __BUILD_TDX_FIELD(non_arch, class, field)	\
> +	(((non_arch) ? TDX_NON_ARCH : 0) |		\
> +	 ((u64)(class) << TDX_CLASS_SHIFT) |		\
> +	 ((u64)(field) & TDX_FIELD_MASK))
> +
> +#define BUILD_TDX_FIELD(class, field)			\
> +	__BUILD_TDX_FIELD(false, (class), (field))
> +
> +#define BUILD_TDX_FIELD_NON_ARCH(class, field)		\
> +	__BUILD_TDX_FIELD(true, (class), (field))
> +
> +
> +/* Class code for TD */
> +#define TD_CLASS_EXECUTION_CONTROLS	17ULL
> +
> +/* Class code for TDVPS */
> +#define TDVPS_CLASS_VMCS		0ULL
> +#define TDVPS_CLASS_GUEST_GPR		16ULL
> +#define TDVPS_CLASS_OTHER_GUEST		17ULL
> +#define TDVPS_CLASS_MANAGEMENT		32ULL
> +
> +enum tdx_tdcs_execution_control {
> +	TD_TDCS_EXEC_TSC_OFFSET = 10,
> +};
> +
> +/* @field is any of enum tdx_tdcs_execution_control */
> +#define TDCS_EXEC(field)		BUILD_TDX_FIELD(TD_CLASS_EXECUTION_CONTROLS, (field))
> +
> +/* @field is the VMCS field encoding */
> +#define TDVPS_VMCS(field)		BUILD_TDX_FIELD(TDVPS_CLASS_VMCS, (field))
> +
> +enum tdx_vcpu_guest_other_state {
> +	TD_VCPU_STATE_DETAILS_NON_ARCH = 0x100,
> +};
> +
> +union tdx_vcpu_state_details {
> +	struct {
> +		u64 vmxip	: 1;
> +		u64 reserved	: 63;
> +	};
> +	u64 full;
> +};
> +
> +/* @field is any of enum tdx_guest_other_state */
> +#define TDVPS_STATE(field)		BUILD_TDX_FIELD(TDVPS_CLASS_OTHER_GUEST, (field))
> +#define TDVPS_STATE_NON_ARCH(field)	BUILD_TDX_FIELD_NON_ARCH(TDVPS_CLASS_OTHER_GUEST, (field))
> +
> +/* Management class fields */
> +enum tdx_vcpu_guest_management {
> +	TD_VCPU_PEND_NMI = 11,
> +};
> +
> +/* @field is any of enum tdx_vcpu_guest_management */
> +#define TDVPS_MANAGEMENT(field)		BUILD_TDX_FIELD(TDVPS_CLASS_MANAGEMENT, (field))
> +
> +#define TDX_EXTENDMR_CHUNKSIZE		256
> +
> +struct tdx_cpuid_value {
> +	u32 eax;
> +	u32 ebx;
> +	u32 ecx;
> +	u32 edx;
> +} __packed;
> +
> +#define TDX_TD_ATTRIBUTE_DEBUG		BIT_ULL(0)

This series doesn't really touch off-TD things, so you can remove this.

> +#define TDX_TD_ATTR_SEPT_VE_DISABLE	BIT_ULL(28)
> +#define TDX_TD_ATTRIBUTE_PKS		BIT_ULL(30)
> +#define TDX_TD_ATTRIBUTE_KL		BIT_ULL(31)
> +#define TDX_TD_ATTRIBUTE_PERFMON	BIT_ULL(63)
> +
> +/*
> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> + */
> +#define TDX_MAX_VCPUS	(~(u16)0)
> +
> +struct td_params {
> +	u64 attributes;
> +	u64 xfam;
> +	u16 max_vcpus;
> +	u8 reserved0[6];
> +
> +	u64 eptp_controls;
> +	u64 exec_controls;
> +	u16 tsc_frequency;
> +	u8  reserved1[38];
> +
> +	u64 mrconfigid[6];
> +	u64 mrowner[6];
> +	u64 mrownerconfig[6];
> +	u64 reserved2[4];
> +
> +	union {
> +		DECLARE_FLEX_ARRAY(struct tdx_cpuid_value, cpuid_values);
> +		u8 reserved3[768];
> +	};
> +} __packed __aligned(1024);
> +
> +/*
> + * Guest uses MAX_PA for GPAW when set.
> + * 0: GPA.SHARED bit is GPA[47]
> + * 1: GPA.SHARED bit is GPA[51]
> + */
> +#define TDX_EXEC_CONTROL_MAX_GPAW      BIT_ULL(0)
> +
> +/*
> + * TDH.VP.ENTER, TDG.VP.VMCALL preserves RBP
> + * 0: RBP can be used for TDG.VP.VMCALL input. RBP is clobbered.
> + * 1: RBP can't be used for TDG.VP.VMCALL input. RBP is preserved.
> + */
> +#define TDX_CONTROL_FLAG_NO_RBP_MOD	BIT_ULL(2)
> +
> +
> +/*
> + * TDX requires the frequency to be defined in units of 25MHz, which is the
> + * frequency of the core crystal clock on TDX-capable platforms, i.e. the TDX
> + * module can only program frequencies that are multiples of 25MHz.  The
> + * frequency must be between 100mhz and 10ghz (inclusive).
> + */
> +#define TDX_TSC_KHZ_TO_25MHZ(tsc_in_khz)	((tsc_in_khz) / (25 * 1000))
> +#define TDX_TSC_25MHZ_TO_KHZ(tsc_in_25mhz)	((tsc_in_25mhz) * (25 * 1000))
> +#define TDX_MIN_TSC_FREQUENCY_KHZ		(100 * 1000)
> +#define TDX_MAX_TSC_FREQUENCY_KHZ		(10 * 1000 * 1000)
> +
> +union tdx_sept_entry {
> +	struct {
> +		u64 r		:  1;
> +		u64 w		:  1;
> +		u64 x		:  1;
> +		u64 mt		:  3;
> +		u64 ipat	:  1;
> +		u64 leaf	:  1;
> +		u64 a		:  1;
> +		u64 d		:  1;
> +		u64 xu		:  1;
> +		u64 ignored0	:  1;
> +		u64 pfn		: 40;
> +		u64 reserved	:  5;
> +		u64 vgp		:  1;
> +		u64 pwa		:  1;
> +		u64 ignored1	:  1;
> +		u64 sss		:  1;
> +		u64 spp		:  1;
> +		u64 ignored2	:  1;
> +		u64 sve		:  1;
> +	};
> +	u64 raw;
> +};
> +
> +enum tdx_sept_entry_state {
> +	TDX_SEPT_FREE = 0,
> +	TDX_SEPT_BLOCKED = 1,
> +	TDX_SEPT_PENDING = 2,
> +	TDX_SEPT_PENDING_BLOCKED = 3,
> +	TDX_SEPT_PRESENT = 4,
> +};
> +
> +union tdx_sept_level_state {
> +	struct {
> +		u64 level	:  3;
> +		u64 reserved0	:  5;
> +		u64 state	:  8;
> +		u64 reserved1	: 48;
> +	};
> +	u64 raw;
> +};
> +
> +/*
> + * Global scope metadata field ID.
> + * See Table "Global Scope Metadata", TDX module 1.5 ABI spec.
> + */
> +#define MD_FIELD_ID_SYS_ATTRIBUTES		0x0A00000200000000ULL
> +#define MD_FIELD_ID_FEATURES0			0x0A00000300000008ULL
> +#define MD_FIELD_ID_ATTRS_FIXED0		0x1900000300000000ULL
> +#define MD_FIELD_ID_ATTRS_FIXED1		0x1900000300000001ULL
> +#define MD_FIELD_ID_XFAM_FIXED0			0x1900000300000002ULL
> +#define MD_FIELD_ID_XFAM_FIXED1			0x1900000300000003ULL
> +
> +#define MD_FIELD_ID_TDCS_BASE_SIZE		0x9800000100000100ULL
> +#define MD_FIELD_ID_TDVPS_BASE_SIZE		0x9800000100000200ULL
> +
> +#define MD_FIELD_ID_NUM_CPUID_CONFIG		0x9900000100000004ULL
> +#define MD_FIELD_ID_CPUID_CONFIG_LEAVES		0x9900000300000400ULL
> +#define MD_FIELD_ID_CPUID_CONFIG_VALUES		0x9900000300000500ULL
> +
> +#define MD_FIELD_ID_FEATURES0_NO_RBP_MOD	BIT_ULL(18)
> +
> +#define TDX_MAX_NR_CPUID_CONFIGS       37
> +
> +#define TDX_MD_ELEMENT_SIZE_8BITS      0
> +#define TDX_MD_ELEMENT_SIZE_16BITS     1
> +#define TDX_MD_ELEMENT_SIZE_32BITS     2
> +#define TDX_MD_ELEMENT_SIZE_64BITS     3
> +
> +union tdx_md_field_id {
> +	struct {
> +		u64 field                       : 24;
> +		u64 reserved0                   : 8;
> +		u64 element_size_code           : 2;
> +		u64 last_element_in_field       : 4;
> +		u64 reserved1                   : 3;
> +		u64 inc_size                    : 1;
> +		u64 write_mask_valid            : 1;
> +		u64 context                     : 3;
> +		u64 reserved2                   : 1;
> +		u64 class                       : 6;
> +		u64 reserved3                   : 1;
> +		u64 non_arch                    : 1;
> +	};
> +	u64 raw;
> +};
> +
> +#define TDX_MD_ELEMENT_SIZE_CODE(_field_id)			\
> +	({ union tdx_md_field_id _fid = { .raw = (_field_id)};  \
> +		_fid.element_size_code; })
> +
> +#endif /* __KVM_X86_TDX_ARCH_H */
> --
> 2.25.1
>
>
Isaku Yamahata March 22, 2024, 11:15 p.m. UTC | #7
On Fri, Mar 22, 2024 at 10:57:53AM +1300,
"Huang, Kai" <kai.huang@intel.com> wrote:

> 
> > +/*
> > + * TDX SEAMCALL API function leaves
> > + */
> > +#define TDH_VP_ENTER			0
> > +#define TDH_MNG_ADDCX			1
> > +#define TDH_MEM_PAGE_ADD		2
> > +#define TDH_MEM_SEPT_ADD		3
> > +#define TDH_VP_ADDCX			4
> > +#define TDH_MEM_PAGE_RELOCATE		5
> 
> I don't think the "RELOCATE" is needed in this patchset?
> 
> > +#define TDH_MEM_PAGE_AUG		6
> > +#define TDH_MEM_RANGE_BLOCK		7
> > +#define TDH_MNG_KEY_CONFIG		8
> > +#define TDH_MNG_CREATE			9
> > +#define TDH_VP_CREATE			10
> > +#define TDH_MNG_RD			11
> > +#define TDH_MR_EXTEND			16
> > +#define TDH_MR_FINALIZE			17
> > +#define TDH_VP_FLUSH			18
> > +#define TDH_MNG_VPFLUSHDONE		19
> > +#define TDH_MNG_KEY_FREEID		20
> > +#define TDH_MNG_INIT			21
> > +#define TDH_VP_INIT			22
> > +#define TDH_MEM_SEPT_RD			25
> > +#define TDH_VP_RD			26
> > +#define TDH_MNG_KEY_RECLAIMID		27
> > +#define TDH_PHYMEM_PAGE_RECLAIM		28
> > +#define TDH_MEM_PAGE_REMOVE		29
> > +#define TDH_MEM_SEPT_REMOVE		30
> > +#define TDH_SYS_RD			34
> > +#define TDH_MEM_TRACK			38
> > +#define TDH_MEM_RANGE_UNBLOCK		39
> > +#define TDH_PHYMEM_CACHE_WB		40
> > +#define TDH_PHYMEM_PAGE_WBINVD		41
> > +#define TDH_VP_WR			43
> > +#define TDH_SYS_LP_SHUTDOWN		44
> 
> And LP_SHUTDOWN is certainly not needed.
> 
> Could you check whether there are others that are not needed?
> 
> Perhaps we should just include macros that got used, but anyway.

Ok, let's break this patch into other patches that uses the constants first.


> > +/*
> > + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> > + */
> 
> Why is this comment applied to TDX_MAX_VCPUS?
> 
> > +#define TDX_MAX_VCPUS	(~(u16)0)
> 
> And is (~(16)0) an architectural value defined by TDX spec, or just SW value
> that you just put here for convenience?
> 
> I mean, is it possible that different version of TDX module have different
> implementation of MAX_CPU, e.g., module 1.0 only supports X but module 1.5
> increases to Y where Y > X?

This is architectural because it the field width is 16 bits.  Each version
of TDX module may have their own limitation with metadata, MAX_VCPUS_PER_TD.


> Anyway, looks you can safely move this to the patch to enable CAP_MAX_CPU?

Yes.


> > +
> > +struct td_params {
> > +	u64 attributes;
> > +	u64 xfam;
> > +	u16 max_vcpus;
> > +	u8 reserved0[6];
> > +
> > +	u64 eptp_controls;
> > +	u64 exec_controls;
> > +	u16 tsc_frequency;
> > +	u8  reserved1[38];
> > +
> > +	u64 mrconfigid[6];
> > +	u64 mrowner[6];
> > +	u64 mrownerconfig[6];
> > +	u64 reserved2[4];
> > +
> > +	union {
> > +		DECLARE_FLEX_ARRAY(struct tdx_cpuid_value, cpuid_values);
> > +		u8 reserved3[768];
> 
> I am not sure you need the 'reseved3[768]', unless you need to make
> sieof(struct td_params) return 1024?

I'm trying to make it 1024 because the spec defines the struct size is 1024.
Maybe I can add BUILD_BUG_ON(sizeof(struct td_params) != 1024);


> > +#define TDX_MD_ELEMENT_SIZE_8BITS      0
> > +#define TDX_MD_ELEMENT_SIZE_16BITS     1
> > +#define TDX_MD_ELEMENT_SIZE_32BITS     2
> > +#define TDX_MD_ELEMENT_SIZE_64BITS     3
> > +
> > +union tdx_md_field_id {
> > +	struct {
> > +		u64 field                       : 24;
> > +		u64 reserved0                   : 8;
> > +		u64 element_size_code           : 2;
> > +		u64 last_element_in_field       : 4;
> > +		u64 reserved1                   : 3;
> > +		u64 inc_size                    : 1;
> > +		u64 write_mask_valid            : 1;
> > +		u64 context                     : 3;
> > +		u64 reserved2                   : 1;
> > +		u64 class                       : 6;
> > +		u64 reserved3                   : 1;
> > +		u64 non_arch                    : 1;
> > +	};
> > +	u64 raw;
> > +};
> 
> Could you clarify why we need such detailed definition?  For metadata
> element size you can use simple '&' and '<<' to get the result.

Now your TDX host patch has the definition in arch/x86/include/asm/tdx.h,
I'll eliminate this one here and use your definition.
Isaku Yamahata March 22, 2024, 11:17 p.m. UTC | #8
On Fri, Mar 22, 2024 at 03:06:35PM +0800,
Yuan Yao <yuan.yao@linux.intel.com> wrote:

> On Mon, Feb 26, 2024 at 12:25:29AM -0800, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> > Define architectural definitions for KVM to issue the TDX SEAMCALLs.
> >
> > Structures and values that are architecturally defined in the TDX module
> > specifications the chapter of ABI Reference.
> >
> > Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> > Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> > ---
> > v19:
> > - drop tdvmcall constants by Xiaoyao
> >
> > v18:
> > - Add metadata field id
> >
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> >  arch/x86/kvm/vmx/tdx_arch.h | 265 ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 265 insertions(+)
> >  create mode 100644 arch/x86/kvm/vmx/tdx_arch.h
> >
> > diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
> > new file mode 100644
> > index 000000000000..e2c1a6f429d7
> > --- /dev/null
> > +++ b/arch/x86/kvm/vmx/tdx_arch.h
> > @@ -0,0 +1,265 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* architectural constants/data definitions for TDX SEAMCALLs */
> > +
> > +#ifndef __KVM_X86_TDX_ARCH_H
> > +#define __KVM_X86_TDX_ARCH_H
> > +
> > +#include <linux/types.h>
> > +
> > +/*
> > + * TDX SEAMCALL API function leaves
> > + */
> > +#define TDH_VP_ENTER			0
> > +#define TDH_MNG_ADDCX			1
> > +#define TDH_MEM_PAGE_ADD		2
> > +#define TDH_MEM_SEPT_ADD		3
> > +#define TDH_VP_ADDCX			4
> > +#define TDH_MEM_PAGE_RELOCATE		5
> > +#define TDH_MEM_PAGE_AUG		6
> > +#define TDH_MEM_RANGE_BLOCK		7
> > +#define TDH_MNG_KEY_CONFIG		8
> > +#define TDH_MNG_CREATE			9
> > +#define TDH_VP_CREATE			10
> > +#define TDH_MNG_RD			11
> > +#define TDH_MR_EXTEND			16
> > +#define TDH_MR_FINALIZE			17
> > +#define TDH_VP_FLUSH			18
> > +#define TDH_MNG_VPFLUSHDONE		19
> > +#define TDH_MNG_KEY_FREEID		20
> > +#define TDH_MNG_INIT			21
> > +#define TDH_VP_INIT			22
> > +#define TDH_MEM_SEPT_RD			25
> > +#define TDH_VP_RD			26
> > +#define TDH_MNG_KEY_RECLAIMID		27
> > +#define TDH_PHYMEM_PAGE_RECLAIM		28
> > +#define TDH_MEM_PAGE_REMOVE		29
> > +#define TDH_MEM_SEPT_REMOVE		30
> > +#define TDH_SYS_RD			34
> > +#define TDH_MEM_TRACK			38
> > +#define TDH_MEM_RANGE_UNBLOCK		39
> > +#define TDH_PHYMEM_CACHE_WB		40
> > +#define TDH_PHYMEM_PAGE_WBINVD		41
> > +#define TDH_VP_WR			43
> > +#define TDH_SYS_LP_SHUTDOWN		44
> > +
> > +/* TDX control structure (TDR/TDCS/TDVPS) field access codes */
> > +#define TDX_NON_ARCH			BIT_ULL(63)
> > +#define TDX_CLASS_SHIFT			56
> > +#define TDX_FIELD_MASK			GENMASK_ULL(31, 0)
> > +
> > +#define __BUILD_TDX_FIELD(non_arch, class, field)	\
> > +	(((non_arch) ? TDX_NON_ARCH : 0) |		\
> > +	 ((u64)(class) << TDX_CLASS_SHIFT) |		\
> > +	 ((u64)(field) & TDX_FIELD_MASK))
> > +
> > +#define BUILD_TDX_FIELD(class, field)			\
> > +	__BUILD_TDX_FIELD(false, (class), (field))
> > +
> > +#define BUILD_TDX_FIELD_NON_ARCH(class, field)		\
> > +	__BUILD_TDX_FIELD(true, (class), (field))
> > +
> > +
> > +/* Class code for TD */
> > +#define TD_CLASS_EXECUTION_CONTROLS	17ULL
> > +
> > +/* Class code for TDVPS */
> > +#define TDVPS_CLASS_VMCS		0ULL
> > +#define TDVPS_CLASS_GUEST_GPR		16ULL
> > +#define TDVPS_CLASS_OTHER_GUEST		17ULL
> > +#define TDVPS_CLASS_MANAGEMENT		32ULL
> > +
> > +enum tdx_tdcs_execution_control {
> > +	TD_TDCS_EXEC_TSC_OFFSET = 10,
> > +};
> > +
> > +/* @field is any of enum tdx_tdcs_execution_control */
> > +#define TDCS_EXEC(field)		BUILD_TDX_FIELD(TD_CLASS_EXECUTION_CONTROLS, (field))
> > +
> > +/* @field is the VMCS field encoding */
> > +#define TDVPS_VMCS(field)		BUILD_TDX_FIELD(TDVPS_CLASS_VMCS, (field))
> > +
> > +enum tdx_vcpu_guest_other_state {
> > +	TD_VCPU_STATE_DETAILS_NON_ARCH = 0x100,
> > +};
> > +
> > +union tdx_vcpu_state_details {
> > +	struct {
> > +		u64 vmxip	: 1;
> > +		u64 reserved	: 63;
> > +	};
> > +	u64 full;
> > +};
> > +
> > +/* @field is any of enum tdx_guest_other_state */
> > +#define TDVPS_STATE(field)		BUILD_TDX_FIELD(TDVPS_CLASS_OTHER_GUEST, (field))
> > +#define TDVPS_STATE_NON_ARCH(field)	BUILD_TDX_FIELD_NON_ARCH(TDVPS_CLASS_OTHER_GUEST, (field))
> > +
> > +/* Management class fields */
> > +enum tdx_vcpu_guest_management {
> > +	TD_VCPU_PEND_NMI = 11,
> > +};
> > +
> > +/* @field is any of enum tdx_vcpu_guest_management */
> > +#define TDVPS_MANAGEMENT(field)		BUILD_TDX_FIELD(TDVPS_CLASS_MANAGEMENT, (field))
> > +
> > +#define TDX_EXTENDMR_CHUNKSIZE		256
> > +
> > +struct tdx_cpuid_value {
> > +	u32 eax;
> > +	u32 ebx;
> > +	u32 ecx;
> > +	u32 edx;
> > +} __packed;
> > +
> > +#define TDX_TD_ATTRIBUTE_DEBUG		BIT_ULL(0)
> 
> This series doesn't really touch off-TD things, so you can remove this.

Yes. I'll clean up to delete unused ones including this.
Sean Christopherson April 3, 2024, 3:04 p.m. UTC | #9
On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote:
> +union tdx_vcpu_state_details {
> +	struct {
> +		u64 vmxip	: 1;
> +		u64 reserved	: 63;
> +	};
> +	u64 full;
> +};

No unions please.  KVM uses unions in a few places where they are the lesser of
all evils, but in general, unions are frowned upon.  Bitfields in particular are
strongly discourage, as they are a nightmare to read/review and tend to generate
bad code.

E.g. for this one, something like (names aren't great)

static inline bool tdx_has_pending_virtual_interrupt(struct kvm_vcpu *vcpu)
{
	return <get "non arch field"> & TDX_VCPU_STATE_VMXIP;
}

> +union tdx_sept_entry {
> +	struct {
> +		u64 r		:  1;
> +		u64 w		:  1;
> +		u64 x		:  1;
> +		u64 mt		:  3;
> +		u64 ipat	:  1;
> +		u64 leaf	:  1;
> +		u64 a		:  1;
> +		u64 d		:  1;
> +		u64 xu		:  1;
> +		u64 ignored0	:  1;
> +		u64 pfn		: 40;
> +		u64 reserved	:  5;
> +		u64 vgp		:  1;
> +		u64 pwa		:  1;
> +		u64 ignored1	:  1;
> +		u64 sss		:  1;
> +		u64 spp		:  1;
> +		u64 ignored2	:  1;
> +		u64 sve		:  1;

Yeah, NAK to these unions.  They are crappy duplicates of existing definitions,
e.g. it took me a few seconds to realize SVE is SUPPRESS_VE, which is far too
long.

> +	};
> +	u64 raw;
> +};
> +enum tdx_sept_entry_state {
> +	TDX_SEPT_FREE = 0,
> +	TDX_SEPT_BLOCKED = 1,
> +	TDX_SEPT_PENDING = 2,
> +	TDX_SEPT_PENDING_BLOCKED = 3,
> +	TDX_SEPT_PRESENT = 4,
> +};
> +
> +union tdx_sept_level_state {
> +	struct {
> +		u64 level	:  3;
> +		u64 reserved0	:  5;
> +		u64 state	:  8;
> +		u64 reserved1	: 48;
> +	};
> +	u64 raw;
> +};

Similar thing here.  Depending on what happens with the SEAMCALL argument mess,
the code can look somethign like:

static u8 tdx_get_sept_level(struct tdx_module_args *out)
{
	return out->rdx & TDX_SEPT_LEVEL_MASK;
}

static u8 tdx_get_sept_state(struct tdx_module_args *out)
{
	return (out->rdx & TDX_SEPT_STATE_MASK) >> TDX_SEPT_STATE_SHIFT;
}

> +union tdx_md_field_id {
> +	struct {
> +		u64 field                       : 24;
> +		u64 reserved0                   : 8;
> +		u64 element_size_code           : 2;
> +		u64 last_element_in_field       : 4;
> +		u64 reserved1                   : 3;
> +		u64 inc_size                    : 1;
> +		u64 write_mask_valid            : 1;
> +		u64 context                     : 3;
> +		u64 reserved2                   : 1;
> +		u64 class                       : 6;
> +		u64 reserved3                   : 1;
> +		u64 non_arch                    : 1;
> +	};
> +	u64 raw;
> +};
> +
> +#define TDX_MD_ELEMENT_SIZE_CODE(_field_id)			\
> +	({ union tdx_md_field_id _fid = { .raw = (_field_id)};  \
> +		_fid.element_size_code; })

Yeah, no thanks.  MASK + SHIFT will do just fine.
Isaku Yamahata April 3, 2024, 4:30 p.m. UTC | #10
On Wed, Apr 03, 2024 at 08:04:03AM -0700,
Sean Christopherson <seanjc@google.com> wrote:

> On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote:
> > +union tdx_vcpu_state_details {
> > +	struct {
> > +		u64 vmxip	: 1;
> > +		u64 reserved	: 63;
> > +	};
> > +	u64 full;
> > +};
> 
> No unions please.  KVM uses unions in a few places where they are the lesser of
> all evils, but in general, unions are frowned upon.  Bitfields in particular are
> strongly discourage, as they are a nightmare to read/review and tend to generate
> bad code.
> 
> E.g. for this one, something like (names aren't great)
> 
> static inline bool tdx_has_pending_virtual_interrupt(struct kvm_vcpu *vcpu)
> {
> 	return <get "non arch field"> & TDX_VCPU_STATE_VMXIP;
> }


Sure, let me replace them with mask and shift.
Huang, Kai April 16, 2024, 12:55 a.m. UTC | #11
On 5/03/2024 9:21 pm, Isaku Yamahata wrote:
> On Fri, Mar 01, 2024 at 03:25:31PM +0800,
> Yan Zhao <yan.y.zhao@intel.com> wrote:
> 
>>> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
>>> + */
>>> +#define TDX_MAX_VCPUS	(~(u16)0)
>> This value will be treated as -1 in tdx_vm_init(),
>> 	"kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);"
>>
>> This will lead to kvm->max_vcpus being -1 by default.
>> Is this by design or just an error?
>> If it's by design, why not set kvm->max_vcpus = -1 in tdx_vm_init() directly.
>> If an unexpected error, may below is better?
>>
>> #define TDX_MAX_VCPUS   (int)((u16)(~0UL))
>> or
>> #define TDX_MAX_VCPUS 65536
> 
> You're right. I'll use ((int)U16_MAX).
> As TDX 1.5 introduced metadata MAX_VCPUS_PER_TD, I'll update to get the value
> and trim it further. Something following.
> 

[...]

>   
> +	u16 max_vcpus_per_td;
> +

[...]

> -	kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);
> +	kvm->max_vcpus = min3(kvm->max_vcpus, tdx_info->max_vcpus_per_td,
> +			     TDX_MAX_VCPUS);
>   

[...]

> -#define TDX_MAX_VCPUS	(~(u16)0)
> +#define TDX_MAX_VCPUS	((int)U16_MAX)

Why do you even need TDX_MAX_VCPUS, given it cannot exceed U16_MAX and 
you will have the 'u16 max_vcpus_per_td' anyway?

IIUC, in KVM_ENABLE_CAP(KVM_CAP_MAX_VCPUS), we can overwrite the 
kvm->max_vcpus to the 'max_vcpus' provided by the userspace, and make 
sure it doesn't exceed tdx_info->max_vcpus_per_td.

Anything I am missing?
Isaku Yamahata April 16, 2024, 4:28 p.m. UTC | #12
On Tue, Apr 16, 2024 at 12:55:33PM +1200,
"Huang, Kai" <kai.huang@intel.com> wrote:

> 
> 
> On 5/03/2024 9:21 pm, Isaku Yamahata wrote:
> > On Fri, Mar 01, 2024 at 03:25:31PM +0800,
> > Yan Zhao <yan.y.zhao@intel.com> wrote:
> > 
> > > > + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
> > > > + */
> > > > +#define TDX_MAX_VCPUS	(~(u16)0)
> > > This value will be treated as -1 in tdx_vm_init(),
> > > 	"kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);"
> > > 
> > > This will lead to kvm->max_vcpus being -1 by default.
> > > Is this by design or just an error?
> > > If it's by design, why not set kvm->max_vcpus = -1 in tdx_vm_init() directly.
> > > If an unexpected error, may below is better?
> > > 
> > > #define TDX_MAX_VCPUS   (int)((u16)(~0UL))
> > > or
> > > #define TDX_MAX_VCPUS 65536
> > 
> > You're right. I'll use ((int)U16_MAX).
> > As TDX 1.5 introduced metadata MAX_VCPUS_PER_TD, I'll update to get the value
> > and trim it further. Something following.
> > 
> 
> [...]
> 
> > +	u16 max_vcpus_per_td;
> > +
> 
> [...]
> 
> > -	kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);
> > +	kvm->max_vcpus = min3(kvm->max_vcpus, tdx_info->max_vcpus_per_td,
> > +			     TDX_MAX_VCPUS);
> 
> [...]
> 
> > -#define TDX_MAX_VCPUS	(~(u16)0)
> > +#define TDX_MAX_VCPUS	((int)U16_MAX)
> 
> Why do you even need TDX_MAX_VCPUS, given it cannot exceed U16_MAX and you
> will have the 'u16 max_vcpus_per_td' anyway?
> 
> IIUC, in KVM_ENABLE_CAP(KVM_CAP_MAX_VCPUS), we can overwrite the
> kvm->max_vcpus to the 'max_vcpus' provided by the userspace, and make sure
> it doesn't exceed tdx_info->max_vcpus_per_td.
> 
> Anything I am missing?

With the latest TDX 1.5 module, we don't need TDX_MAX_VCPUS.

The metadata MD_FIELD_ID_MAX_VCPUS_PER_TD was introduced at the middle version
of TDX 1.5. (I don't remember the exact version.), the logic was something
like as follows.  Now if we fail to read the metadata, disable TDX.

read metadata MD_FIELD_ID_MAX_VCPUS_PER_TD;
if success
  tdx_info->max_vcpu_per_td = the value read metadata
else
  tdx_info->max_vcpu_per_td = TDX_MAX_VCPUS;
Huang, Kai April 16, 2024, 10:06 p.m. UTC | #13
On 17/04/2024 4:28 am, Yamahata, Isaku wrote:
> On Tue, Apr 16, 2024 at 12:55:33PM +1200,
> "Huang, Kai" <kai.huang@intel.com> wrote:
> 
>>
>>
>> On 5/03/2024 9:21 pm, Isaku Yamahata wrote:
>>> On Fri, Mar 01, 2024 at 03:25:31PM +0800,
>>> Yan Zhao <yan.y.zhao@intel.com> wrote:
>>>
>>>>> + * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
>>>>> + */
>>>>> +#define TDX_MAX_VCPUS	(~(u16)0)
>>>> This value will be treated as -1 in tdx_vm_init(),
>>>> 	"kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);"
>>>>
>>>> This will lead to kvm->max_vcpus being -1 by default.
>>>> Is this by design or just an error?
>>>> If it's by design, why not set kvm->max_vcpus = -1 in tdx_vm_init() directly.
>>>> If an unexpected error, may below is better?
>>>>
>>>> #define TDX_MAX_VCPUS   (int)((u16)(~0UL))
>>>> or
>>>> #define TDX_MAX_VCPUS 65536
>>>
>>> You're right. I'll use ((int)U16_MAX).
>>> As TDX 1.5 introduced metadata MAX_VCPUS_PER_TD, I'll update to get the value
>>> and trim it further. Something following.
>>>
>>
>> [...]
>>
>>> +	u16 max_vcpus_per_td;
>>> +
>>
>> [...]
>>
>>> -	kvm->max_vcpus = min(kvm->max_vcpus, TDX_MAX_VCPUS);
>>> +	kvm->max_vcpus = min3(kvm->max_vcpus, tdx_info->max_vcpus_per_td,
>>> +			     TDX_MAX_VCPUS);
>>
>> [...]
>>
>>> -#define TDX_MAX_VCPUS	(~(u16)0)
>>> +#define TDX_MAX_VCPUS	((int)U16_MAX)
>>
>> Why do you even need TDX_MAX_VCPUS, given it cannot exceed U16_MAX and you
>> will have the 'u16 max_vcpus_per_td' anyway?
>>
>> IIUC, in KVM_ENABLE_CAP(KVM_CAP_MAX_VCPUS), we can overwrite the
>> kvm->max_vcpus to the 'max_vcpus' provided by the userspace, and make sure
>> it doesn't exceed tdx_info->max_vcpus_per_td.
>>
>> Anything I am missing?
> 
> With the latest TDX 1.5 module, we don't need TDX_MAX_VCPUS.
> 
> The metadata MD_FIELD_ID_MAX_VCPUS_PER_TD was introduced at the middle version
> of TDX 1.5. (I don't remember the exact version.), the logic was something
> like as follows.  Now if we fail to read the metadata, disable TDX.
> 
> read metadata MD_FIELD_ID_MAX_VCPUS_PER_TD;
> if success
>    tdx_info->max_vcpu_per_td = the value read metadata
> else
>    tdx_info->max_vcpu_per_td = TDX_MAX_VCPUS;
> 

OK.  But even the SEAMCALL can fail, we can just use U16_MAX directly 
when it fails given we can see clearly the type of max_vcpu_per_td is 'u16'.

if success
	tdx_info->max_vcpu_per_td = the value read metadata
else
	tdx_info->max_vcpu_per_td = U16_MAX;

So I don't see why TDX_MAX_VCPUS is needed (especially in tdx_arch.h as 
it is not an architectural value but just some value chosen for our 
convenience).
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/tdx_arch.h b/arch/x86/kvm/vmx/tdx_arch.h
new file mode 100644
index 000000000000..e2c1a6f429d7
--- /dev/null
+++ b/arch/x86/kvm/vmx/tdx_arch.h
@@ -0,0 +1,265 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* architectural constants/data definitions for TDX SEAMCALLs */
+
+#ifndef __KVM_X86_TDX_ARCH_H
+#define __KVM_X86_TDX_ARCH_H
+
+#include <linux/types.h>
+
+/*
+ * TDX SEAMCALL API function leaves
+ */
+#define TDH_VP_ENTER			0
+#define TDH_MNG_ADDCX			1
+#define TDH_MEM_PAGE_ADD		2
+#define TDH_MEM_SEPT_ADD		3
+#define TDH_VP_ADDCX			4
+#define TDH_MEM_PAGE_RELOCATE		5
+#define TDH_MEM_PAGE_AUG		6
+#define TDH_MEM_RANGE_BLOCK		7
+#define TDH_MNG_KEY_CONFIG		8
+#define TDH_MNG_CREATE			9
+#define TDH_VP_CREATE			10
+#define TDH_MNG_RD			11
+#define TDH_MR_EXTEND			16
+#define TDH_MR_FINALIZE			17
+#define TDH_VP_FLUSH			18
+#define TDH_MNG_VPFLUSHDONE		19
+#define TDH_MNG_KEY_FREEID		20
+#define TDH_MNG_INIT			21
+#define TDH_VP_INIT			22
+#define TDH_MEM_SEPT_RD			25
+#define TDH_VP_RD			26
+#define TDH_MNG_KEY_RECLAIMID		27
+#define TDH_PHYMEM_PAGE_RECLAIM		28
+#define TDH_MEM_PAGE_REMOVE		29
+#define TDH_MEM_SEPT_REMOVE		30
+#define TDH_SYS_RD			34
+#define TDH_MEM_TRACK			38
+#define TDH_MEM_RANGE_UNBLOCK		39
+#define TDH_PHYMEM_CACHE_WB		40
+#define TDH_PHYMEM_PAGE_WBINVD		41
+#define TDH_VP_WR			43
+#define TDH_SYS_LP_SHUTDOWN		44
+
+/* TDX control structure (TDR/TDCS/TDVPS) field access codes */
+#define TDX_NON_ARCH			BIT_ULL(63)
+#define TDX_CLASS_SHIFT			56
+#define TDX_FIELD_MASK			GENMASK_ULL(31, 0)
+
+#define __BUILD_TDX_FIELD(non_arch, class, field)	\
+	(((non_arch) ? TDX_NON_ARCH : 0) |		\
+	 ((u64)(class) << TDX_CLASS_SHIFT) |		\
+	 ((u64)(field) & TDX_FIELD_MASK))
+
+#define BUILD_TDX_FIELD(class, field)			\
+	__BUILD_TDX_FIELD(false, (class), (field))
+
+#define BUILD_TDX_FIELD_NON_ARCH(class, field)		\
+	__BUILD_TDX_FIELD(true, (class), (field))
+
+
+/* Class code for TD */
+#define TD_CLASS_EXECUTION_CONTROLS	17ULL
+
+/* Class code for TDVPS */
+#define TDVPS_CLASS_VMCS		0ULL
+#define TDVPS_CLASS_GUEST_GPR		16ULL
+#define TDVPS_CLASS_OTHER_GUEST		17ULL
+#define TDVPS_CLASS_MANAGEMENT		32ULL
+
+enum tdx_tdcs_execution_control {
+	TD_TDCS_EXEC_TSC_OFFSET = 10,
+};
+
+/* @field is any of enum tdx_tdcs_execution_control */
+#define TDCS_EXEC(field)		BUILD_TDX_FIELD(TD_CLASS_EXECUTION_CONTROLS, (field))
+
+/* @field is the VMCS field encoding */
+#define TDVPS_VMCS(field)		BUILD_TDX_FIELD(TDVPS_CLASS_VMCS, (field))
+
+enum tdx_vcpu_guest_other_state {
+	TD_VCPU_STATE_DETAILS_NON_ARCH = 0x100,
+};
+
+union tdx_vcpu_state_details {
+	struct {
+		u64 vmxip	: 1;
+		u64 reserved	: 63;
+	};
+	u64 full;
+};
+
+/* @field is any of enum tdx_guest_other_state */
+#define TDVPS_STATE(field)		BUILD_TDX_FIELD(TDVPS_CLASS_OTHER_GUEST, (field))
+#define TDVPS_STATE_NON_ARCH(field)	BUILD_TDX_FIELD_NON_ARCH(TDVPS_CLASS_OTHER_GUEST, (field))
+
+/* Management class fields */
+enum tdx_vcpu_guest_management {
+	TD_VCPU_PEND_NMI = 11,
+};
+
+/* @field is any of enum tdx_vcpu_guest_management */
+#define TDVPS_MANAGEMENT(field)		BUILD_TDX_FIELD(TDVPS_CLASS_MANAGEMENT, (field))
+
+#define TDX_EXTENDMR_CHUNKSIZE		256
+
+struct tdx_cpuid_value {
+	u32 eax;
+	u32 ebx;
+	u32 ecx;
+	u32 edx;
+} __packed;
+
+#define TDX_TD_ATTRIBUTE_DEBUG		BIT_ULL(0)
+#define TDX_TD_ATTR_SEPT_VE_DISABLE	BIT_ULL(28)
+#define TDX_TD_ATTRIBUTE_PKS		BIT_ULL(30)
+#define TDX_TD_ATTRIBUTE_KL		BIT_ULL(31)
+#define TDX_TD_ATTRIBUTE_PERFMON	BIT_ULL(63)
+
+/*
+ * TD_PARAMS is provided as an input to TDH_MNG_INIT, the size of which is 1024B.
+ */
+#define TDX_MAX_VCPUS	(~(u16)0)
+
+struct td_params {
+	u64 attributes;
+	u64 xfam;
+	u16 max_vcpus;
+	u8 reserved0[6];
+
+	u64 eptp_controls;
+	u64 exec_controls;
+	u16 tsc_frequency;
+	u8  reserved1[38];
+
+	u64 mrconfigid[6];
+	u64 mrowner[6];
+	u64 mrownerconfig[6];
+	u64 reserved2[4];
+
+	union {
+		DECLARE_FLEX_ARRAY(struct tdx_cpuid_value, cpuid_values);
+		u8 reserved3[768];
+	};
+} __packed __aligned(1024);
+
+/*
+ * Guest uses MAX_PA for GPAW when set.
+ * 0: GPA.SHARED bit is GPA[47]
+ * 1: GPA.SHARED bit is GPA[51]
+ */
+#define TDX_EXEC_CONTROL_MAX_GPAW      BIT_ULL(0)
+
+/*
+ * TDH.VP.ENTER, TDG.VP.VMCALL preserves RBP
+ * 0: RBP can be used for TDG.VP.VMCALL input. RBP is clobbered.
+ * 1: RBP can't be used for TDG.VP.VMCALL input. RBP is preserved.
+ */
+#define TDX_CONTROL_FLAG_NO_RBP_MOD	BIT_ULL(2)
+
+
+/*
+ * TDX requires the frequency to be defined in units of 25MHz, which is the
+ * frequency of the core crystal clock on TDX-capable platforms, i.e. the TDX
+ * module can only program frequencies that are multiples of 25MHz.  The
+ * frequency must be between 100mhz and 10ghz (inclusive).
+ */
+#define TDX_TSC_KHZ_TO_25MHZ(tsc_in_khz)	((tsc_in_khz) / (25 * 1000))
+#define TDX_TSC_25MHZ_TO_KHZ(tsc_in_25mhz)	((tsc_in_25mhz) * (25 * 1000))
+#define TDX_MIN_TSC_FREQUENCY_KHZ		(100 * 1000)
+#define TDX_MAX_TSC_FREQUENCY_KHZ		(10 * 1000 * 1000)
+
+union tdx_sept_entry {
+	struct {
+		u64 r		:  1;
+		u64 w		:  1;
+		u64 x		:  1;
+		u64 mt		:  3;
+		u64 ipat	:  1;
+		u64 leaf	:  1;
+		u64 a		:  1;
+		u64 d		:  1;
+		u64 xu		:  1;
+		u64 ignored0	:  1;
+		u64 pfn		: 40;
+		u64 reserved	:  5;
+		u64 vgp		:  1;
+		u64 pwa		:  1;
+		u64 ignored1	:  1;
+		u64 sss		:  1;
+		u64 spp		:  1;
+		u64 ignored2	:  1;
+		u64 sve		:  1;
+	};
+	u64 raw;
+};
+
+enum tdx_sept_entry_state {
+	TDX_SEPT_FREE = 0,
+	TDX_SEPT_BLOCKED = 1,
+	TDX_SEPT_PENDING = 2,
+	TDX_SEPT_PENDING_BLOCKED = 3,
+	TDX_SEPT_PRESENT = 4,
+};
+
+union tdx_sept_level_state {
+	struct {
+		u64 level	:  3;
+		u64 reserved0	:  5;
+		u64 state	:  8;
+		u64 reserved1	: 48;
+	};
+	u64 raw;
+};
+
+/*
+ * Global scope metadata field ID.
+ * See Table "Global Scope Metadata", TDX module 1.5 ABI spec.
+ */
+#define MD_FIELD_ID_SYS_ATTRIBUTES		0x0A00000200000000ULL
+#define MD_FIELD_ID_FEATURES0			0x0A00000300000008ULL
+#define MD_FIELD_ID_ATTRS_FIXED0		0x1900000300000000ULL
+#define MD_FIELD_ID_ATTRS_FIXED1		0x1900000300000001ULL
+#define MD_FIELD_ID_XFAM_FIXED0			0x1900000300000002ULL
+#define MD_FIELD_ID_XFAM_FIXED1			0x1900000300000003ULL
+
+#define MD_FIELD_ID_TDCS_BASE_SIZE		0x9800000100000100ULL
+#define MD_FIELD_ID_TDVPS_BASE_SIZE		0x9800000100000200ULL
+
+#define MD_FIELD_ID_NUM_CPUID_CONFIG		0x9900000100000004ULL
+#define MD_FIELD_ID_CPUID_CONFIG_LEAVES		0x9900000300000400ULL
+#define MD_FIELD_ID_CPUID_CONFIG_VALUES		0x9900000300000500ULL
+
+#define MD_FIELD_ID_FEATURES0_NO_RBP_MOD	BIT_ULL(18)
+
+#define TDX_MAX_NR_CPUID_CONFIGS       37
+
+#define TDX_MD_ELEMENT_SIZE_8BITS      0
+#define TDX_MD_ELEMENT_SIZE_16BITS     1
+#define TDX_MD_ELEMENT_SIZE_32BITS     2
+#define TDX_MD_ELEMENT_SIZE_64BITS     3
+
+union tdx_md_field_id {
+	struct {
+		u64 field                       : 24;
+		u64 reserved0                   : 8;
+		u64 element_size_code           : 2;
+		u64 last_element_in_field       : 4;
+		u64 reserved1                   : 3;
+		u64 inc_size                    : 1;
+		u64 write_mask_valid            : 1;
+		u64 context                     : 3;
+		u64 reserved2                   : 1;
+		u64 class                       : 6;
+		u64 reserved3                   : 1;
+		u64 non_arch                    : 1;
+	};
+	u64 raw;
+};
+
+#define TDX_MD_ELEMENT_SIZE_CODE(_field_id)			\
+	({ union tdx_md_field_id _fid = { .raw = (_field_id)};  \
+		_fid.element_size_code; })
+
+#endif /* __KVM_X86_TDX_ARCH_H */