diff mbox series

[v3,09/21] x86/virt/tdx: Get information about TDX module and convertible memory

Message ID 145620795852bf24ba2124a3f8234fd4aaac19d4.1649219184.git.kai.huang@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX host kernel support | expand

Commit Message

Huang, Kai April 6, 2022, 4:49 a.m. UTC
TDX provides increased levels of memory confidentiality and integrity.
This requires special hardware support for features like memory
encryption and storage of memory integrity checksums.  Not all memory
satisfies these requirements.

As a result, TDX introduced the concept of a "Convertible Memory Region"
(CMR).  During boot, the firmware builds a list of all of the memory
ranges which can provide the TDX security guarantees.  The list of these
ranges, along with TDX module information, is available to the kernel by
querying the TDX module via TDH.SYS.INFO SEAMCALL.

Host kernel can choose whether or not to use all convertible memory
regions as TDX memory.  Before TDX module is ready to create any TD
guests, all TDX memory regions that host kernel intends to use must be
configured to the TDX module, using specific data structures defined by
TDX architecture.  Constructing those structures requires information of
both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
to get this information as preparation to construct those structures.

Signed-off-by: Kai Huang <kai.huang@intel.com>
---
 arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
 arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
 2 files changed, 192 insertions(+)

Comments

Kuppuswamy Sathyanarayanan April 25, 2022, 2:58 a.m. UTC | #1
On 4/5/22 9:49 PM, Kai Huang wrote:
> TDX provides increased levels of memory confidentiality and integrity.
> This requires special hardware support for features like memory
> encryption and storage of memory integrity checksums.  Not all memory
> satisfies these requirements.
> 
> As a result, TDX introduced the concept of a "Convertible Memory Region"
> (CMR).  During boot, the firmware builds a list of all of the memory
> ranges which can provide the TDX security guarantees.  The list of these
> ranges, along with TDX module information, is available to the kernel by
> querying the TDX module via TDH.SYS.INFO SEAMCALL.
> 
> Host kernel can choose whether or not to use all convertible memory
> regions as TDX memory.  Before TDX module is ready to create any TD
> guests, all TDX memory regions that host kernel intends to use must be
> configured to the TDX module, using specific data structures defined by
> TDX architecture.  Constructing those structures requires information of
> both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
> to get this information as preparation to construct those structures.
> 
> Signed-off-by: Kai Huang <kai.huang@intel.com>
> ---

Looks good. Some minor comments.

>   arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
>   arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
>   2 files changed, 192 insertions(+)
> 
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index ef2718423f0f..482e6d858181 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
>   
>   static struct p_seamldr_info p_seamldr_info;
>   
> +/* Base address of CMR array needs to be 512 bytes aligned. */
> +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
> +static int tdx_cmr_num;
> +static struct tdsysinfo_struct tdx_sysinfo;
> +
>   static bool __seamrr_enabled(void)
>   {
>   	return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
> @@ -468,6 +473,127 @@ static int tdx_module_init_cpus(void)
>   	return seamcall_on_each_cpu(&sc);
>   }
>   
> +static inline bool cmr_valid(struct cmr_info *cmr)
> +{
> +	return !!cmr->size;
> +}
> +
> +static void print_cmrs(struct cmr_info *cmr_array, int cmr_num,
> +		       const char *name)
> +{
> +	int i;
> +
> +	for (i = 0; i < cmr_num; i++) {
> +		struct cmr_info *cmr = &cmr_array[i];
> +
> +		pr_info("%s : [0x%llx, 0x%llx)\n", name,
> +				cmr->base, cmr->base + cmr->size);
> +	}

I am not sure if it is ok to print this info by default or pr_debug
would be better. I will let maintainers decide about it.

> +}
> +
> +static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)

Since this function only deals with tdx_cmr_array, why pass it
as argument?

> +{
> +	int i, j;
> +
> +	/*
> +	 * Intel TDX module spec, 20.7.3 CMR_INFO:
> +	 *
> +	 *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
> +	 *   array of CMR_INFO entries. The CMRs are sorted from the
> +	 *   lowest base address to the highest base address, and they
> +	 *   are non-overlapping.
> +	 *
> +	 * This implies that BIOS may generate invalid empty entries
> +	 * if total CMRs are less than 32.  Skip them manually.
> +	 */
> +	for (i = 0; i < cmr_num; i++) {
> +		struct cmr_info *cmr = &cmr_array[i];
> +		struct cmr_info *prev_cmr = NULL;

Why not keep declarations together at the top of the function?

> +
> +		/* Skip further invalid CMRs */
> +		if (!cmr_valid(cmr))
> +			break;
> +
> +		if (i > 0)
> +			prev_cmr = &cmr_array[i - 1];
> +
> +		/*
> +		 * It is a TDX firmware bug if CMRs are not
> +		 * in address ascending order.
> +		 */
> +		if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
> +					cmr->base)) {
> +			pr_err("Firmware bug: CMRs not in address ascending order.\n");
> +			return -EFAULT;
> +		}

Since above condition is only true for i > 0 case, why not combine them
together if (i > 0) {...}

> +	}
> +
> +	/*
> +	 * Also a sane BIOS should never generate invalid CMR(s) between
> +	 * two valid CMRs.  Sanity check this and simply return error in
> +	 * this case.
> +	 *
> +	 * By reaching here @i is the index of the first invalid CMR (or
> +	 * cmr_num).  Starting with next entry of @i since it has already
> +	 * been checked.
> +	 */
> +	for (j = i + 1; j < cmr_num; j++)
> +		if (cmr_valid(&cmr_array[j])) {
> +			pr_err("Firmware bug: invalid CMR(s) among valid CMRs.\n");
> +			return -EFAULT;
> +		}
> +
> +	/*
> +	 * Trim all tail invalid empty CMRs.  BIOS should generate at
> +	 * least one valid CMR, otherwise it's a TDX firmware bug.
> +	 */
> +	tdx_cmr_num = i;
> +	if (!tdx_cmr_num) {
> +		pr_err("Firmware bug: No valid CMR.\n");
> +		return -EFAULT;
> +	}
> +
> +	/* Print kernel sanitized CMRs */
> +	print_cmrs(tdx_cmr_array, tdx_cmr_num, "Kernel-sanitized-CMR");
> +
> +	return 0;
> +}
> +
Huang, Kai April 26, 2022, 12:05 a.m. UTC | #2
> 
> > +}
> > +
> > +static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)
> 
> Since this function only deals with tdx_cmr_array, why pass it
> as argument?

I received comments to use cmr_num as argument and pass tdx_cmr_num to
sanitize_cmrs() and finalize it at the end of this function.  In this case I
think it's better to pass tdx_cmr_array as argument.  It also saves some typing
(tdx_cmr_array vs cmr_array) in sanitize_cmrs().

> 
> > +{
> > +	int i, j;
> > +
> > +	/*
> > +	 * Intel TDX module spec, 20.7.3 CMR_INFO:
> > +	 *
> > +	 *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
> > +	 *   array of CMR_INFO entries. The CMRs are sorted from the
> > +	 *   lowest base address to the highest base address, and they
> > +	 *   are non-overlapping.
> > +	 *
> > +	 * This implies that BIOS may generate invalid empty entries
> > +	 * if total CMRs are less than 32.  Skip them manually.
> > +	 */
> > +	for (i = 0; i < cmr_num; i++) {
> > +		struct cmr_info *cmr = &cmr_array[i];
> > +		struct cmr_info *prev_cmr = NULL;
> 
> Why not keep declarations together at the top of the function?

Why? They are only used in this for-loop.

> 
> > +
> > +		/* Skip further invalid CMRs */
> > +		if (!cmr_valid(cmr))
> > +			break;
> > +
> > +		if (i > 0)
> > +			prev_cmr = &cmr_array[i - 1];
> > +
> > +		/*
> > +		 * It is a TDX firmware bug if CMRs are not
> > +		 * in address ascending order.
> > +		 */
> > +		if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
> > +					cmr->base)) {
> > +			pr_err("Firmware bug: CMRs not in address ascending order.\n");
> > +			return -EFAULT;
> > +		}
> 
> Since above condition is only true for i > 0 case, why not combine them
> together if (i > 0) {...}

It will make an additional ident for the above if() {} to check prev_cmr and
cmr.  I don't see it is better?
Dave Hansen April 27, 2022, 10:15 p.m. UTC | #3
On 4/5/22 21:49, Kai Huang wrote:
> TDX provides increased levels of memory confidentiality and integrity.
> This requires special hardware support for features like memory
> encryption and storage of memory integrity checksums.  Not all memory
> satisfies these requirements.
> 
> As a result, TDX introduced the concept of a "Convertible Memory Region"
> (CMR).  During boot, the firmware builds a list of all of the memory
> ranges which can provide the TDX security guarantees.  The list of these
> ranges, along with TDX module information, is available to the kernel by
> querying the TDX module via TDH.SYS.INFO SEAMCALL.
> 
> Host kernel can choose whether or not to use all convertible memory
> regions as TDX memory.  Before TDX module is ready to create any TD
> guests, all TDX memory regions that host kernel intends to use must be
> configured to the TDX module, using specific data structures defined by
> TDX architecture.  Constructing those structures requires information of
> both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
> to get this information as preparation to construct those structures.
> 
> Signed-off-by: Kai Huang <kai.huang@intel.com>
> ---
>  arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
>  arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
>  2 files changed, 192 insertions(+)
> 
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index ef2718423f0f..482e6d858181 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
>  
>  static struct p_seamldr_info p_seamldr_info;
>  
> +/* Base address of CMR array needs to be 512 bytes aligned. */
> +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
> +static int tdx_cmr_num;
> +static struct tdsysinfo_struct tdx_sysinfo;

I really dislike mixing hardware and software structures.  Please make
it clear which of these are fully software-defined and which are part of
the hardware ABI.

>  static bool __seamrr_enabled(void)
>  {
>  	return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
> @@ -468,6 +473,127 @@ static int tdx_module_init_cpus(void)
>  	return seamcall_on_each_cpu(&sc);
>  }
>  
> +static inline bool cmr_valid(struct cmr_info *cmr)
> +{
> +	return !!cmr->size;
> +}
> +
> +static void print_cmrs(struct cmr_info *cmr_array, int cmr_num,
> +		       const char *name)
> +{
> +	int i;
> +
> +	for (i = 0; i < cmr_num; i++) {
> +		struct cmr_info *cmr = &cmr_array[i];
> +
> +		pr_info("%s : [0x%llx, 0x%llx)\n", name,
> +				cmr->base, cmr->base + cmr->size);
> +	}
> +}
> +
> +static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)
> +{
> +	int i, j;
> +
> +	/*
> +	 * Intel TDX module spec, 20.7.3 CMR_INFO:
> +	 *
> +	 *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
> +	 *   array of CMR_INFO entries. The CMRs are sorted from the
> +	 *   lowest base address to the highest base address, and they
> +	 *   are non-overlapping.
> +	 *
> +	 * This implies that BIOS may generate invalid empty entries
> +	 * if total CMRs are less than 32.  Skip them manually.
> +	 */
> +	for (i = 0; i < cmr_num; i++) {
> +		struct cmr_info *cmr = &cmr_array[i];
> +		struct cmr_info *prev_cmr = NULL;
> +
> +		/* Skip further invalid CMRs */
> +		if (!cmr_valid(cmr))
> +			break;
> +
> +		if (i > 0)
> +			prev_cmr = &cmr_array[i - 1];
> +
> +		/*
> +		 * It is a TDX firmware bug if CMRs are not
> +		 * in address ascending order.
> +		 */
> +		if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
> +					cmr->base)) {
> +			pr_err("Firmware bug: CMRs not in address ascending order.\n");
> +			return -EFAULT;

-EFAULT is a really weird return code to use for this.  I'd use -EINVAL.

> +		}
> +	}
> +
> +	/*
> +	 * Also a sane BIOS should never generate invalid CMR(s) between
> +	 * two valid CMRs.  Sanity check this and simply return error in
> +	 * this case.
> +	 *
> +	 * By reaching here @i is the index of the first invalid CMR (or
> +	 * cmr_num).  Starting with next entry of @i since it has already
> +	 * been checked.
> +	 */
> +	for (j = i + 1; j < cmr_num; j++)
> +		if (cmr_valid(&cmr_array[j])) {
> +			pr_err("Firmware bug: invalid CMR(s) among valid CMRs.\n");
> +			return -EFAULT;
> +		}

Please add brackets for the for().

> +	/*
> +	 * Trim all tail invalid empty CMRs.  BIOS should generate at
> +	 * least one valid CMR, otherwise it's a TDX firmware bug.
> +	 */
> +	tdx_cmr_num = i;
> +	if (!tdx_cmr_num) {
> +		pr_err("Firmware bug: No valid CMR.\n");
> +		return -EFAULT;
> +	}
> +
> +	/* Print kernel sanitized CMRs */
> +	print_cmrs(tdx_cmr_array, tdx_cmr_num, "Kernel-sanitized-CMR");
> +
> +	return 0;
> +}
> +
> +static int tdx_get_sysinfo(void)
> +{
> +	struct tdx_module_output out;
> +	u64 tdsysinfo_sz, cmr_num;
> +	int ret;
> +
> +	BUILD_BUG_ON(sizeof(struct tdsysinfo_struct) != TDSYSINFO_STRUCT_SIZE);
> +
> +	ret = seamcall(TDH_SYS_INFO, __pa(&tdx_sysinfo), TDSYSINFO_STRUCT_SIZE,
> +			__pa(tdx_cmr_array), MAX_CMRS, NULL, &out);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * If TDH.SYS.CONFIG succeeds, RDX contains the actual bytes
> +	 * written to @tdx_sysinfo and R9 contains the actual entries
> +	 * written to @tdx_cmr_array.  Sanity check them.
> +	 */
> +	tdsysinfo_sz = out.rdx;
> +	cmr_num = out.r9;

Please vertically align things like this:

	tdsysinfo_sz = out.rdx;
	cmr_num	     = out.r9;

> +	if (WARN_ON_ONCE((tdsysinfo_sz > sizeof(tdx_sysinfo)) || !tdsysinfo_sz ||
> +				(cmr_num > MAX_CMRS) || !cmr_num))
> +		return -EFAULT;

Sanity checking is good, but this makes me wonder how much is too much.
 I don't see a lot of code for instance checking if sys_write() writes
more than how much it was supposed to.

Why are these sanity checks necessary here?  Is the TDX module expected
to be *THAT* buggy?  The thing that's providing, oh, basically all of
the security guarantees of this architecture.  It's overflowing the
buffers you hand it?

> +	pr_info("TDX module: vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
> +		tdx_sysinfo.vendor_id, tdx_sysinfo.major_version,
> +		tdx_sysinfo.minor_version, tdx_sysinfo.build_date,
> +		tdx_sysinfo.build_num);
> +
> +	/* Print BIOS provided CMRs */
> +	print_cmrs(tdx_cmr_array, cmr_num, "BIOS-CMR");
> +
> +	return sanitize_cmrs(tdx_cmr_array, cmr_num);
> +}

Does sanitize_cmrs() sanitize anything?  It looks to me like it *checks*
the CMRs.  But, sanitizing is an active operation that writes to the
data being sanitized.  This looks read-only to me.  check_cmrs() would
be a better name for a passive check.

>  static int init_tdx_module(void)
>  {
>  	int ret;
> @@ -482,6 +608,11 @@ static int init_tdx_module(void)
>  	if (ret)
>  		goto out;
>  
> +	/* Get TDX module information and CMRs */
> +	ret = tdx_get_sysinfo();
> +	if (ret)
> +		goto out;

Couldn't we get rid of that comment if you did something like:

	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);

and preferably make the variables function-local.

>  	/*
>  	 * Return -EFAULT until all steps of TDX module
>  	 * initialization are done.
> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> index b8cfdd6e12f3..2f21c45df6ac 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.h
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -29,6 +29,66 @@ struct p_seamldr_info {
>  	u8	reserved2[88];
>  } __packed __aligned(P_SEAMLDR_INFO_ALIGNMENT);
>  
> +struct cmr_info {
> +	u64	base;
> +	u64	size;
> +} __packed;
> +
> +#define MAX_CMRS			32
> +#define CMR_INFO_ARRAY_ALIGNMENT	512
> +
> +struct cpuid_config {
> +	u32	leaf;
> +	u32	sub_leaf;
> +	u32	eax;
> +	u32	ebx;
> +	u32	ecx;
> +	u32	edx;
> +} __packed;
> +
> +#define TDSYSINFO_STRUCT_SIZE		1024
> +#define TDSYSINFO_STRUCT_ALIGNMENT	1024
> +
> +struct tdsysinfo_struct {
> +	/* TDX-SEAM Module Info */
> +	u32	attributes;
> +	u32	vendor_id;
> +	u32	build_date;
> +	u16	build_num;
> +	u16	minor_version;
> +	u16	major_version;
> +	u8	reserved0[14];
> +	/* Memory Info */
> +	u16	max_tdmrs;
> +	u16	max_reserved_per_tdmr;
> +	u16	pamt_entry_size;
> +	u8	reserved1[10];
> +	/* Control Struct Info */
> +	u16	tdcs_base_size;
> +	u8	reserved2[2];
> +	u16	tdvps_base_size;
> +	u8	tdvps_xfam_dependent_size;
> +	u8	reserved3[9];
> +	/* TD Capabilities */
> +	u64	attributes_fixed0;
> +	u64	attributes_fixed1;
> +	u64	xfam_fixed0;
> +	u64	xfam_fixed1;
> +	u8	reserved4[32];
> +	u32	num_cpuid_config;
> +	/*
> +	 * The actual number of CPUID_CONFIG depends on above
> +	 * 'num_cpuid_config'.  The size of 'struct tdsysinfo_struct'
> +	 * is 1024B defined by TDX architecture.  Use a union with
> +	 * specific padding to make 'sizeof(struct tdsysinfo_struct)'
> +	 * equal to 1024.
> +	 */
> +	union {
> +		struct cpuid_config	cpuid_configs[0];
> +		u8			reserved5[892];
> +	};
> +} __packed __aligned(TDSYSINFO_STRUCT_ALIGNMENT);
> +
>  /*
>   * P-SEAMLDR SEAMCALL leaf function
>   */
> @@ -38,6 +98,7 @@ struct p_seamldr_info {
>  /*
>   * TDX module SEAMCALL leaf functions
>   */
> +#define TDH_SYS_INFO		32
>  #define TDH_SYS_INIT		33
>  #define TDH_SYS_LP_INIT		35
>  #define TDH_SYS_LP_SHUTDOWN	44
Huang, Kai April 28, 2022, 12:15 a.m. UTC | #4
On Wed, 2022-04-27 at 15:15 -0700, Dave Hansen wrote:
> On 4/5/22 21:49, Kai Huang wrote:
> > TDX provides increased levels of memory confidentiality and integrity.
> > This requires special hardware support for features like memory
> > encryption and storage of memory integrity checksums.  Not all memory
> > satisfies these requirements.
> > 
> > As a result, TDX introduced the concept of a "Convertible Memory Region"
> > (CMR).  During boot, the firmware builds a list of all of the memory
> > ranges which can provide the TDX security guarantees.  The list of these
> > ranges, along with TDX module information, is available to the kernel by
> > querying the TDX module via TDH.SYS.INFO SEAMCALL.
> > 
> > Host kernel can choose whether or not to use all convertible memory
> > regions as TDX memory.  Before TDX module is ready to create any TD
> > guests, all TDX memory regions that host kernel intends to use must be
> > configured to the TDX module, using specific data structures defined by
> > TDX architecture.  Constructing those structures requires information of
> > both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
> > to get this information as preparation to construct those structures.
> > 
> > Signed-off-by: Kai Huang <kai.huang@intel.com>
> > ---
> >  arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
> >  arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
> >  2 files changed, 192 insertions(+)
> > 
> > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> > index ef2718423f0f..482e6d858181 100644
> > --- a/arch/x86/virt/vmx/tdx/tdx.c
> > +++ b/arch/x86/virt/vmx/tdx/tdx.c
> > @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
> >  
> >  static struct p_seamldr_info p_seamldr_info;
> >  
> > +/* Base address of CMR array needs to be 512 bytes aligned. */
> > +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
> > +static int tdx_cmr_num;
> > +static struct tdsysinfo_struct tdx_sysinfo;
> 
> I really dislike mixing hardware and software structures.  Please make
> it clear which of these are fully software-defined and which are part of
> the hardware ABI.

Both 'struct tdsysinfo_struct' and 'struct cmr_info' are hardware structures. 
They are defined in tdx.h which has a comment saying the data structures below
this comment is hardware structures:

	+/*
	+ * TDX architectural data structures
	+ */

It is introduced in the P-SEAMLDR patch.

Should I explicitly add comments around the variables saying they are used by
hardware, something like:

	/*
	 * Data structures used by TDH.SYS.INFO SEAMCALL to return CMRs and
	 * TDX module system information.
	 */

?
 
> 
> >  static bool __seamrr_enabled(void)
> >  {
> >  	return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
> > @@ -468,6 +473,127 @@ static int tdx_module_init_cpus(void)
> >  	return seamcall_on_each_cpu(&sc);
> >  }
> >  
> > +static inline bool cmr_valid(struct cmr_info *cmr)
> > +{
> > +	return !!cmr->size;
> > +}
> > +
> > +static void print_cmrs(struct cmr_info *cmr_array, int cmr_num,
> > +		       const char *name)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < cmr_num; i++) {
> > +		struct cmr_info *cmr = &cmr_array[i];
> > +
> > +		pr_info("%s : [0x%llx, 0x%llx)\n", name,
> > +				cmr->base, cmr->base + cmr->size);
> > +	}
> > +}
> > +
> > +static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)
> > +{
> > +	int i, j;
> > +
> > +	/*
> > +	 * Intel TDX module spec, 20.7.3 CMR_INFO:
> > +	 *
> > +	 *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
> > +	 *   array of CMR_INFO entries. The CMRs are sorted from the
> > +	 *   lowest base address to the highest base address, and they
> > +	 *   are non-overlapping.
> > +	 *
> > +	 * This implies that BIOS may generate invalid empty entries
> > +	 * if total CMRs are less than 32.  Skip them manually.
> > +	 */
> > +	for (i = 0; i < cmr_num; i++) {
> > +		struct cmr_info *cmr = &cmr_array[i];
> > +		struct cmr_info *prev_cmr = NULL;
> > +
> > +		/* Skip further invalid CMRs */
> > +		if (!cmr_valid(cmr))
> > +			break;
> > +
> > +		if (i > 0)
> > +			prev_cmr = &cmr_array[i - 1];
> > +
> > +		/*
> > +		 * It is a TDX firmware bug if CMRs are not
> > +		 * in address ascending order.
> > +		 */
> > +		if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
> > +					cmr->base)) {
> > +			pr_err("Firmware bug: CMRs not in address ascending order.\n");
> > +			return -EFAULT;
> 
> -EFAULT is a really weird return code to use for this.  I'd use -EINVAL.

OK thanks.

> 
> > +		}
> > +	}
> > +
> > +	/*
> > +	 * Also a sane BIOS should never generate invalid CMR(s) between
> > +	 * two valid CMRs.  Sanity check this and simply return error in
> > +	 * this case.
> > +	 *
> > +	 * By reaching here @i is the index of the first invalid CMR (or
> > +	 * cmr_num).  Starting with next entry of @i since it has already
> > +	 * been checked.
> > +	 */
> > +	for (j = i + 1; j < cmr_num; j++)
> > +		if (cmr_valid(&cmr_array[j])) {
> > +			pr_err("Firmware bug: invalid CMR(s) among valid CMRs.\n");
> > +			return -EFAULT;
> > +		}
> 
> Please add brackets for the for().

OK.

> 
> > +	/*
> > +	 * Trim all tail invalid empty CMRs.  BIOS should generate at
> > +	 * least one valid CMR, otherwise it's a TDX firmware bug.
> > +	 */
> > +	tdx_cmr_num = i;
> > +	if (!tdx_cmr_num) {
> > +		pr_err("Firmware bug: No valid CMR.\n");
> > +		return -EFAULT;
> > +	}
> > +
> > +	/* Print kernel sanitized CMRs */
> > +	print_cmrs(tdx_cmr_array, tdx_cmr_num, "Kernel-sanitized-CMR");
> > +
> > +	return 0;
> > +}
> > +
> > +static int tdx_get_sysinfo(void)
> > +{
> > +	struct tdx_module_output out;
> > +	u64 tdsysinfo_sz, cmr_num;
> > +	int ret;
> > +
> > +	BUILD_BUG_ON(sizeof(struct tdsysinfo_struct) != TDSYSINFO_STRUCT_SIZE);
> > +
> > +	ret = seamcall(TDH_SYS_INFO, __pa(&tdx_sysinfo), TDSYSINFO_STRUCT_SIZE,
> > +			__pa(tdx_cmr_array), MAX_CMRS, NULL, &out);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/*
> > +	 * If TDH.SYS.CONFIG succeeds, RDX contains the actual bytes
> > +	 * written to @tdx_sysinfo and R9 contains the actual entries
> > +	 * written to @tdx_cmr_array.  Sanity check them.
> > +	 */
> > +	tdsysinfo_sz = out.rdx;
> > +	cmr_num = out.r9;
> 
> Please vertically align things like this:
> 
> 	tdsysinfo_sz = out.rdx;
> 	cmr_num	     = out.r9;

OK.

> 
> > +	if (WARN_ON_ONCE((tdsysinfo_sz > sizeof(tdx_sysinfo)) || !tdsysinfo_sz ||
> > +				(cmr_num > MAX_CMRS) || !cmr_num))
> > +		return -EFAULT;
> 
> Sanity checking is good, but this makes me wonder how much is too much.
>  I don't see a lot of code for instance checking if sys_write() writes
> more than how much it was supposed to.
> 
> Why are these sanity checks necessary here?  Is the TDX module expected
> to be *THAT* buggy?  The thing that's providing, oh, basically all of
> the security guarantees of this architecture.  It's overflowing the
> buffers you hand it?

I think this check can be removed.  Will remove.

> 
> > +	pr_info("TDX module: vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
> > +		tdx_sysinfo.vendor_id, tdx_sysinfo.major_version,
> > +		tdx_sysinfo.minor_version, tdx_sysinfo.build_date,
> > +		tdx_sysinfo.build_num);
> > +
> > +	/* Print BIOS provided CMRs */
> > +	print_cmrs(tdx_cmr_array, cmr_num, "BIOS-CMR");
> > +
> > +	return sanitize_cmrs(tdx_cmr_array, cmr_num);
> > +}
> 
> Does sanitize_cmrs() sanitize anything?  It looks to me like it *checks*
> the CMRs.  But, sanitizing is an active operation that writes to the
> data being sanitized.  This looks read-only to me.  check_cmrs() would
> be a better name for a passive check.

Sure will change to check_cmrs().

> 
> >  static int init_tdx_module(void)
> >  {
> >  	int ret;
> > @@ -482,6 +608,11 @@ static int init_tdx_module(void)
> >  	if (ret)
> >  		goto out;
> >  
> > +	/* Get TDX module information and CMRs */
> > +	ret = tdx_get_sysinfo();
> > +	if (ret)
> > +		goto out;
> 
> Couldn't we get rid of that comment if you did something like:
> 
> 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);

Yes will do.

> 
> and preferably make the variables function-local.

'tdx_sysinfo' will be used by KVM too.
Dave Hansen April 28, 2022, 2:06 p.m. UTC | #5
On 4/27/22 17:15, Kai Huang wrote:
> On Wed, 2022-04-27 at 15:15 -0700, Dave Hansen wrote:
>> On 4/5/22 21:49, Kai Huang wrote:
>>> TDX provides increased levels of memory confidentiality and integrity.
>>> This requires special hardware support for features like memory
>>> encryption and storage of memory integrity checksums.  Not all memory
>>> satisfies these requirements.
>>>
>>> As a result, TDX introduced the concept of a "Convertible Memory Region"
>>> (CMR).  During boot, the firmware builds a list of all of the memory
>>> ranges which can provide the TDX security guarantees.  The list of these
>>> ranges, along with TDX module information, is available to the kernel by
>>> querying the TDX module via TDH.SYS.INFO SEAMCALL.
>>>
>>> Host kernel can choose whether or not to use all convertible memory
>>> regions as TDX memory.  Before TDX module is ready to create any TD
>>> guests, all TDX memory regions that host kernel intends to use must be
>>> configured to the TDX module, using specific data structures defined by
>>> TDX architecture.  Constructing those structures requires information of
>>> both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
>>> to get this information as preparation to construct those structures.
>>>
>>> Signed-off-by: Kai Huang <kai.huang@intel.com>
>>> ---
>>>  arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
>>>  arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
>>>  2 files changed, 192 insertions(+)
>>>
>>> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
>>> index ef2718423f0f..482e6d858181 100644
>>> --- a/arch/x86/virt/vmx/tdx/tdx.c
>>> +++ b/arch/x86/virt/vmx/tdx/tdx.c
>>> @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
>>>  
>>>  static struct p_seamldr_info p_seamldr_info;
>>>  
>>> +/* Base address of CMR array needs to be 512 bytes aligned. */
>>> +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
>>> +static int tdx_cmr_num;
>>> +static struct tdsysinfo_struct tdx_sysinfo;
>>
>> I really dislike mixing hardware and software structures.  Please make
>> it clear which of these are fully software-defined and which are part of
>> the hardware ABI.
> 
> Both 'struct tdsysinfo_struct' and 'struct cmr_info' are hardware structures. 
> They are defined in tdx.h which has a comment saying the data structures below
> this comment is hardware structures:
> 
> 	+/*
> 	+ * TDX architectural data structures
> 	+ */
> 
> It is introduced in the P-SEAMLDR patch.
> 
> Should I explicitly add comments around the variables saying they are used by
> hardware, something like:
> 
> 	/*
> 	 * Data structures used by TDH.SYS.INFO SEAMCALL to return CMRs and
> 	 * TDX module system information.
> 	 */

I think we know they are data structures. :)

But, saying:

	/* Used in TDH.SYS.INFO SEAMCALL ABI: */

*is* actually helpful.  It (probably) tells us where in the spec we can
find the definition and tells how it gets used.  Plus, it tells us this
isn't a software data structure.

>>> +	/* Get TDX module information and CMRs */
>>> +	ret = tdx_get_sysinfo();
>>> +	if (ret)
>>> +		goto out;
>>
>> Couldn't we get rid of that comment if you did something like:
>>
>> 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> 
> Yes will do.
> 
>> and preferably make the variables function-local.
> 
> 'tdx_sysinfo' will be used by KVM too.

In other words, it's not a part of this series so I can't review whether
this statement is correct or whether there's a better way to hand this
information over to KVM.

This (minor) nugget influencing the design also isn't even commented or
addressed in the changelog.
Huang, Kai April 28, 2022, 11:14 p.m. UTC | #6
On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
> On 4/27/22 17:15, Kai Huang wrote:
> > On Wed, 2022-04-27 at 15:15 -0700, Dave Hansen wrote:
> > > On 4/5/22 21:49, Kai Huang wrote:
> > > > TDX provides increased levels of memory confidentiality and integrity.
> > > > This requires special hardware support for features like memory
> > > > encryption and storage of memory integrity checksums.  Not all memory
> > > > satisfies these requirements.
> > > > 
> > > > As a result, TDX introduced the concept of a "Convertible Memory Region"
> > > > (CMR).  During boot, the firmware builds a list of all of the memory
> > > > ranges which can provide the TDX security guarantees.  The list of these
> > > > ranges, along with TDX module information, is available to the kernel by
> > > > querying the TDX module via TDH.SYS.INFO SEAMCALL.
> > > > 
> > > > Host kernel can choose whether or not to use all convertible memory
> > > > regions as TDX memory.  Before TDX module is ready to create any TD
> > > > guests, all TDX memory regions that host kernel intends to use must be
> > > > configured to the TDX module, using specific data structures defined by
> > > > TDX architecture.  Constructing those structures requires information of
> > > > both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
> > > > to get this information as preparation to construct those structures.
> > > > 
> > > > Signed-off-by: Kai Huang <kai.huang@intel.com>
> > > > ---
> > > >  arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
> > > >  arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
> > > >  2 files changed, 192 insertions(+)
> > > > 
> > > > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> > > > index ef2718423f0f..482e6d858181 100644
> > > > --- a/arch/x86/virt/vmx/tdx/tdx.c
> > > > +++ b/arch/x86/virt/vmx/tdx/tdx.c
> > > > @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
> > > >  
> > > >  static struct p_seamldr_info p_seamldr_info;
> > > >  
> > > > +/* Base address of CMR array needs to be 512 bytes aligned. */
> > > > +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
> > > > +static int tdx_cmr_num;
> > > > +static struct tdsysinfo_struct tdx_sysinfo;
> > > 
> > > I really dislike mixing hardware and software structures.  Please make
> > > it clear which of these are fully software-defined and which are part of
> > > the hardware ABI.
> > 
> > Both 'struct tdsysinfo_struct' and 'struct cmr_info' are hardware structures. 
> > They are defined in tdx.h which has a comment saying the data structures below
> > this comment is hardware structures:
> > 
> > 	+/*
> > 	+ * TDX architectural data structures
> > 	+ */
> > 
> > It is introduced in the P-SEAMLDR patch.
> > 
> > Should I explicitly add comments around the variables saying they are used by
> > hardware, something like:
> > 
> > 	/*
> > 	 * Data structures used by TDH.SYS.INFO SEAMCALL to return CMRs and
> > 	 * TDX module system information.
> > 	 */
> 
> I think we know they are data structures. :)
> 
> But, saying:
> 
> 	/* Used in TDH.SYS.INFO SEAMCALL ABI: */
> 
> *is* actually helpful.  It (probably) tells us where in the spec we can
> find the definition and tells how it gets used.  Plus, it tells us this
> isn't a software data structure.

Right.  I'll use your above comment.

> 
> > > > +	/* Get TDX module information and CMRs */
> > > > +	ret = tdx_get_sysinfo();
> > > > +	if (ret)
> > > > +		goto out;
> > > 
> > > Couldn't we get rid of that comment if you did something like:
> > > 
> > > 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> > 
> > Yes will do.
> > 
> > > and preferably make the variables function-local.
> > 
> > 'tdx_sysinfo' will be used by KVM too.
> 
> In other words, it's not a part of this series so I can't review whether
> this statement is correct or whether there's a better way to hand this
> information over to KVM.
> 
> This (minor) nugget influencing the design also isn't even commented or
> addressed in the changelog.

TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
in the stack.  I can change to use dynamic allocation at the beginning and free
it at the end of the function.  KVM support patches can change it to static
variable in the file.

Or I can add a sentence saying KVM will need to use 'tdx_tdsysinfo' so use
static variable.  However currently KVM doesn't use CMR so no justification for
CMR array.

But I am thinking about memory hotplug interaction with TDX module
initialization.  That may use CMR info.  Let me send out proposal and close that
first to see whether this series needs to use CMR info out of this function.
Dave Hansen April 29, 2022, 5:47 p.m. UTC | #7
On 4/28/22 16:14, Kai Huang wrote:
> On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
>> On 4/27/22 17:15, Kai Huang wrote:
>>>> Couldn't we get rid of that comment if you did something like:
>>>>
>>>> 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
>>>
>>> Yes will do.
>>>
>>>> and preferably make the variables function-local.
>>>
>>> 'tdx_sysinfo' will be used by KVM too.
>>
>> In other words, it's not a part of this series so I can't review whether
>> this statement is correct or whether there's a better way to hand this
>> information over to KVM.
>>
>> This (minor) nugget influencing the design also isn't even commented or
>> addressed in the changelog.
> 
> TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
> in the stack.  I can change to use dynamic allocation at the beginning and free
> it at the end of the function.  KVM support patches can change it to static
> variable in the file.

2k of stack is big, but it isn't a deal breaker for something that's not
nested anywhere and that's only called once in a pretty controlled
setting and not in interrupt context.  I wouldn't cry about it.
Huang, Kai May 2, 2022, 5:04 a.m. UTC | #8
On Fri, 2022-04-29 at 10:47 -0700, Dave Hansen wrote:
> On 4/28/22 16:14, Kai Huang wrote:
> > On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
> > > On 4/27/22 17:15, Kai Huang wrote:
> > > > > Couldn't we get rid of that comment if you did something like:
> > > > > 
> > > > > 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> > > > 
> > > > Yes will do.
> > > > 
> > > > > and preferably make the variables function-local.
> > > > 
> > > > 'tdx_sysinfo' will be used by KVM too.
> > > 
> > > In other words, it's not a part of this series so I can't review whether
> > > this statement is correct or whether there's a better way to hand this
> > > information over to KVM.
> > > 
> > > This (minor) nugget influencing the design also isn't even commented or
> > > addressed in the changelog.
> > 
> > TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
> > in the stack.  I can change to use dynamic allocation at the beginning and free
> > it at the end of the function.  KVM support patches can change it to static
> > variable in the file.
> 
> 2k of stack is big, but it isn't a deal breaker for something that's not
> nested anywhere and that's only called once in a pretty controlled
> setting and not in interrupt context.  I wouldn't cry about it.

OK.  I'll change to use function local variables for both of them.
Sagi Shahar May 18, 2022, 10:30 p.m. UTC | #9
On Wed, Apr 27, 2022 at 5:15 PM Kai Huang <kai.huang@intel.com> wrote:
>
> On Wed, 2022-04-27 at 15:15 -0700, Dave Hansen wrote:
> > On 4/5/22 21:49, Kai Huang wrote:
> > > TDX provides increased levels of memory confidentiality and integrity.
> > > This requires special hardware support for features like memory
> > > encryption and storage of memory integrity checksums.  Not all memory
> > > satisfies these requirements.
> > >
> > > As a result, TDX introduced the concept of a "Convertible Memory Region"
> > > (CMR).  During boot, the firmware builds a list of all of the memory
> > > ranges which can provide the TDX security guarantees.  The list of these
> > > ranges, along with TDX module information, is available to the kernel by
> > > querying the TDX module via TDH.SYS.INFO SEAMCALL.
> > >
> > > Host kernel can choose whether or not to use all convertible memory
> > > regions as TDX memory.  Before TDX module is ready to create any TD
> > > guests, all TDX memory regions that host kernel intends to use must be
> > > configured to the TDX module, using specific data structures defined by
> > > TDX architecture.  Constructing those structures requires information of
> > > both TDX module and the Convertible Memory Regions.  Call TDH.SYS.INFO
> > > to get this information as preparation to construct those structures.
> > >
> > > Signed-off-by: Kai Huang <kai.huang@intel.com>
> > > ---
> > >  arch/x86/virt/vmx/tdx/tdx.c | 131 ++++++++++++++++++++++++++++++++++++
> > >  arch/x86/virt/vmx/tdx/tdx.h |  61 +++++++++++++++++
> > >  2 files changed, 192 insertions(+)
> > >
> > > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> > > index ef2718423f0f..482e6d858181 100644
> > > --- a/arch/x86/virt/vmx/tdx/tdx.c
> > > +++ b/arch/x86/virt/vmx/tdx/tdx.c
> > > @@ -80,6 +80,11 @@ static DEFINE_MUTEX(tdx_module_lock);
> > >
> > >  static struct p_seamldr_info p_seamldr_info;
> > >
> > > +/* Base address of CMR array needs to be 512 bytes aligned. */
> > > +static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
> > > +static int tdx_cmr_num;
> > > +static struct tdsysinfo_struct tdx_sysinfo;
> >
> > I really dislike mixing hardware and software structures.  Please make
> > it clear which of these are fully software-defined and which are part of
> > the hardware ABI.
>
> Both 'struct tdsysinfo_struct' and 'struct cmr_info' are hardware structures.
> They are defined in tdx.h which has a comment saying the data structures below
> this comment is hardware structures:
>
>         +/*
>         + * TDX architectural data structures
>         + */
>
> It is introduced in the P-SEAMLDR patch.
>
> Should I explicitly add comments around the variables saying they are used by
> hardware, something like:
>
>         /*
>          * Data structures used by TDH.SYS.INFO SEAMCALL to return CMRs and
>          * TDX module system information.
>          */
>
> ?
>
> >
> > >  static bool __seamrr_enabled(void)
> > >  {
> > >     return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
> > > @@ -468,6 +473,127 @@ static int tdx_module_init_cpus(void)
> > >     return seamcall_on_each_cpu(&sc);
> > >  }
> > >
> > > +static inline bool cmr_valid(struct cmr_info *cmr)
> > > +{
> > > +   return !!cmr->size;
> > > +}
> > > +
> > > +static void print_cmrs(struct cmr_info *cmr_array, int cmr_num,
> > > +                  const char *name)
> > > +{
> > > +   int i;
> > > +
> > > +   for (i = 0; i < cmr_num; i++) {
> > > +           struct cmr_info *cmr = &cmr_array[i];
> > > +
> > > +           pr_info("%s : [0x%llx, 0x%llx)\n", name,
> > > +                           cmr->base, cmr->base + cmr->size);
> > > +   }
> > > +}
> > > +
> > > +static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)
> > > +{
> > > +   int i, j;
> > > +
> > > +   /*
> > > +    * Intel TDX module spec, 20.7.3 CMR_INFO:
> > > +    *
> > > +    *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
> > > +    *   array of CMR_INFO entries. The CMRs are sorted from the
> > > +    *   lowest base address to the highest base address, and they
> > > +    *   are non-overlapping.
> > > +    *
> > > +    * This implies that BIOS may generate invalid empty entries
> > > +    * if total CMRs are less than 32.  Skip them manually.
> > > +    */
> > > +   for (i = 0; i < cmr_num; i++) {
> > > +           struct cmr_info *cmr = &cmr_array[i];
> > > +           struct cmr_info *prev_cmr = NULL;
> > > +
> > > +           /* Skip further invalid CMRs */
> > > +           if (!cmr_valid(cmr))
> > > +                   break;
> > > +
> > > +           if (i > 0)
> > > +                   prev_cmr = &cmr_array[i - 1];
> > > +
> > > +           /*
> > > +            * It is a TDX firmware bug if CMRs are not
> > > +            * in address ascending order.
> > > +            */
> > > +           if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
> > > +                                   cmr->base)) {
> > > +                   pr_err("Firmware bug: CMRs not in address ascending order.\n");
> > > +                   return -EFAULT;
> >
> > -EFAULT is a really weird return code to use for this.  I'd use -EINVAL.
>
> OK thanks.
>
> >
> > > +           }
> > > +   }
> > > +
> > > +   /*
> > > +    * Also a sane BIOS should never generate invalid CMR(s) between
> > > +    * two valid CMRs.  Sanity check this and simply return error in
> > > +    * this case.
> > > +    *
> > > +    * By reaching here @i is the index of the first invalid CMR (or
> > > +    * cmr_num).  Starting with next entry of @i since it has already
> > > +    * been checked.
> > > +    */
> > > +   for (j = i + 1; j < cmr_num; j++)
> > > +           if (cmr_valid(&cmr_array[j])) {
> > > +                   pr_err("Firmware bug: invalid CMR(s) among valid CMRs.\n");
> > > +                   return -EFAULT;
> > > +           }
> >
> > Please add brackets for the for().
>
> OK.
>
> >
> > > +   /*
> > > +    * Trim all tail invalid empty CMRs.  BIOS should generate at
> > > +    * least one valid CMR, otherwise it's a TDX firmware bug.
> > > +    */
> > > +   tdx_cmr_num = i;
> > > +   if (!tdx_cmr_num) {
> > > +           pr_err("Firmware bug: No valid CMR.\n");
> > > +           return -EFAULT;
> > > +   }
> > > +
> > > +   /* Print kernel sanitized CMRs */
> > > +   print_cmrs(tdx_cmr_array, tdx_cmr_num, "Kernel-sanitized-CMR");
> > > +
> > > +   return 0;
> > > +}
> > > +
> > > +static int tdx_get_sysinfo(void)
> > > +{
> > > +   struct tdx_module_output out;
> > > +   u64 tdsysinfo_sz, cmr_num;
> > > +   int ret;
> > > +
> > > +   BUILD_BUG_ON(sizeof(struct tdsysinfo_struct) != TDSYSINFO_STRUCT_SIZE);
> > > +
> > > +   ret = seamcall(TDH_SYS_INFO, __pa(&tdx_sysinfo), TDSYSINFO_STRUCT_SIZE,
> > > +                   __pa(tdx_cmr_array), MAX_CMRS, NULL, &out);
> > > +   if (ret)
> > > +           return ret;
> > > +
> > > +   /*
> > > +    * If TDH.SYS.CONFIG succeeds, RDX contains the actual bytes
> > > +    * written to @tdx_sysinfo and R9 contains the actual entries
> > > +    * written to @tdx_cmr_array.  Sanity check them.
> > > +    */
> > > +   tdsysinfo_sz = out.rdx;
> > > +   cmr_num = out.r9;
> >
> > Please vertically align things like this:
> >
> >       tdsysinfo_sz = out.rdx;
> >       cmr_num      = out.r9;
>
> OK.
>
> >
> > > +   if (WARN_ON_ONCE((tdsysinfo_sz > sizeof(tdx_sysinfo)) || !tdsysinfo_sz ||
> > > +                           (cmr_num > MAX_CMRS) || !cmr_num))
> > > +           return -EFAULT;
> >
> > Sanity checking is good, but this makes me wonder how much is too much.
> >  I don't see a lot of code for instance checking if sys_write() writes
> > more than how much it was supposed to.
> >
> > Why are these sanity checks necessary here?  Is the TDX module expected
> > to be *THAT* buggy?  The thing that's providing, oh, basically all of
> > the security guarantees of this architecture.  It's overflowing the
> > buffers you hand it?
>
> I think this check can be removed.  Will remove.
>
> >
> > > +   pr_info("TDX module: vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
> > > +           tdx_sysinfo.vendor_id, tdx_sysinfo.major_version,
> > > +           tdx_sysinfo.minor_version, tdx_sysinfo.build_date,
> > > +           tdx_sysinfo.build_num);
> > > +
> > > +   /* Print BIOS provided CMRs */
> > > +   print_cmrs(tdx_cmr_array, cmr_num, "BIOS-CMR");
> > > +

sanitize_cmrs already prints the cmrs in case of success. So for valid
cmrs we are going to print them twice.
Would it be better to only print cmrs here in case sanitize_cmrs fails?

> > > +   return sanitize_cmrs(tdx_cmr_array, cmr_num);
> > > +}
> >
> > Does sanitize_cmrs() sanitize anything?  It looks to me like it *checks*
> > the CMRs.  But, sanitizing is an active operation that writes to the
> > data being sanitized.  This looks read-only to me.  check_cmrs() would
> > be a better name for a passive check.
>
> Sure will change to check_cmrs().
>
> >
> > >  static int init_tdx_module(void)
> > >  {
> > >     int ret;
> > > @@ -482,6 +608,11 @@ static int init_tdx_module(void)
> > >     if (ret)
> > >             goto out;
> > >
> > > +   /* Get TDX module information and CMRs */
> > > +   ret = tdx_get_sysinfo();
> > > +   if (ret)
> > > +           goto out;
> >
> > Couldn't we get rid of that comment if you did something like:
> >
> >       ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
>
> Yes will do.
>
> >
> > and preferably make the variables function-local.
>
> 'tdx_sysinfo' will be used by KVM too.
>
>
>
> --
> Thanks,
> -Kai
>
>

Sagi
Huang, Kai May 18, 2022, 11:56 p.m. UTC | #10
On Wed, 2022-05-18 at 15:30 -0700, Sagi Shahar wrote:
> > > 
> > > > +   pr_info("TDX module: vendor_id 0x%x, major_version %u, minor_version
> > > > %u, build_date %u, build_num %u",
> > > > +           tdx_sysinfo.vendor_id, tdx_sysinfo.major_version,
> > > > +           tdx_sysinfo.minor_version, tdx_sysinfo.build_date,
> > > > +           tdx_sysinfo.build_num);
> > > > +
> > > > +   /* Print BIOS provided CMRs */
> > > > +   print_cmrs(tdx_cmr_array, cmr_num, "BIOS-CMR");
> > > > +
> 
> sanitize_cmrs already prints the cmrs in case of success. So for valid
> cmrs we are going to print them twice.
> Would it be better to only print cmrs here in case sanitize_cmrs fails?

The "BIOS-CMR" will always have 32 entries.  It includes all the *empty* CMRs
after the valid CMRs, so the two are different.  But yes it seems there's no
need to always print "BIOS-CMR".
Huang, Kai May 25, 2022, 4:47 a.m. UTC | #11
On Fri, 2022-04-29 at 10:47 -0700, Dave Hansen wrote:
> On 4/28/22 16:14, Kai Huang wrote:
> > On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
> > > On 4/27/22 17:15, Kai Huang wrote:
> > > > > Couldn't we get rid of that comment if you did something like:
> > > > > 
> > > > > 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> > > > 
> > > > Yes will do.
> > > > 
> > > > > and preferably make the variables function-local.
> > > > 
> > > > 'tdx_sysinfo' will be used by KVM too.
> > > 
> > > In other words, it's not a part of this series so I can't review whether
> > > this statement is correct or whether there's a better way to hand this
> > > information over to KVM.
> > > 
> > > This (minor) nugget influencing the design also isn't even commented or
> > > addressed in the changelog.
> > 
> > TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
> > in the stack.  I can change to use dynamic allocation at the beginning and free
> > it at the end of the function.  KVM support patches can change it to static
> > variable in the file.
> 
> 2k of stack is big, but it isn't a deal breaker for something that's not
> nested anywhere and that's only called once in a pretty controlled
> setting and not in interrupt context.  I wouldn't cry about it.

Hi Dave,

I got below complaining when I use local variable for TDSYSINFO_STRUCT and CMR
array:

arch/x86/virt/vmx/tdx/tdx.c:383:1: warning: the frame size of 3072 bytes is
larger than 1024 bytes [-Wframe-larger-than=]
  383 | }

So I don't think we can use local variable for them.  I'll still use static
variables to avoid dynamic allocation.  In the commit message, I'll explain they
are too big to put into the stack, and KVM will need to use TDSYSINFO_STRUCT
reported by TDX module anyway.

Let me know if you don't agree?
Huang, Kai May 25, 2022, 4:57 a.m. UTC | #12
On Wed, 2022-05-25 at 16:47 +1200, Kai Huang wrote:
> On Fri, 2022-04-29 at 10:47 -0700, Dave Hansen wrote:
> > On 4/28/22 16:14, Kai Huang wrote:
> > > On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
> > > > On 4/27/22 17:15, Kai Huang wrote:
> > > > > > Couldn't we get rid of that comment if you did something like:
> > > > > > 
> > > > > > 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> > > > > 
> > > > > Yes will do.
> > > > > 
> > > > > > and preferably make the variables function-local.
> > > > > 
> > > > > 'tdx_sysinfo' will be used by KVM too.
> > > > 
> > > > In other words, it's not a part of this series so I can't review whether
> > > > this statement is correct or whether there's a better way to hand this
> > > > information over to KVM.
> > > > 
> > > > This (minor) nugget influencing the design also isn't even commented or
> > > > addressed in the changelog.
> > > 
> > > TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
> > > in the stack.  I can change to use dynamic allocation at the beginning and free
> > > it at the end of the function.  KVM support patches can change it to static
> > > variable in the file.
> > 
> > 2k of stack is big, but it isn't a deal breaker for something that's not
> > nested anywhere and that's only called once in a pretty controlled
> > setting and not in interrupt context.  I wouldn't cry about it.
> 
> Hi Dave,
> 
> I got below complaining when I use local variable for TDSYSINFO_STRUCT and CMR
> array:
> 
> arch/x86/virt/vmx/tdx/tdx.c:383:1: warning: the frame size of 3072 bytes is
> larger than 1024 bytes [-Wframe-larger-than=]
>   383 | }
> 
> So I don't think we can use local variable for them.  I'll still use static
> variables to avoid dynamic allocation.  In the commit message, I'll explain they
> are too big to put into the stack, and KVM will need to use TDSYSINFO_STRUCT
> reported by TDX module anyway.
> 
> Let me know if you don't agree?

Btw, CMR array alone can be put into the stack.  It will never be used by KVM,
so I'll put CMR array as local variable, but keep tdx_sysinfo as static
variable.
Huang, Kai May 25, 2022, 4 p.m. UTC | #13
On Wed, 2022-05-25 at 16:57 +1200, Kai Huang wrote:
> On Wed, 2022-05-25 at 16:47 +1200, Kai Huang wrote:
> > On Fri, 2022-04-29 at 10:47 -0700, Dave Hansen wrote:
> > > On 4/28/22 16:14, Kai Huang wrote:
> > > > On Thu, 2022-04-28 at 07:06 -0700, Dave Hansen wrote:
> > > > > On 4/27/22 17:15, Kai Huang wrote:
> > > > > > > Couldn't we get rid of that comment if you did something like:
> > > > > > > 
> > > > > > > 	ret = tdx_get_sysinfo(&tdx_cmr_array, &tdx_sysinfo);
> > > > > > 
> > > > > > Yes will do.
> > > > > > 
> > > > > > > and preferably make the variables function-local.
> > > > > > 
> > > > > > 'tdx_sysinfo' will be used by KVM too.
> > > > > 
> > > > > In other words, it's not a part of this series so I can't review whether
> > > > > this statement is correct or whether there's a better way to hand this
> > > > > information over to KVM.
> > > > > 
> > > > > This (minor) nugget influencing the design also isn't even commented or
> > > > > addressed in the changelog.
> > > > 
> > > > TDSYSINFO_STRUCT is 1024B and CMR array is 512B, so I don't think it should be
> > > > in the stack.  I can change to use dynamic allocation at the beginning and free
> > > > it at the end of the function.  KVM support patches can change it to static
> > > > variable in the file.
> > > 
> > > 2k of stack is big, but it isn't a deal breaker for something that's not
> > > nested anywhere and that's only called once in a pretty controlled
> > > setting and not in interrupt context.  I wouldn't cry about it.
> > 
> > Hi Dave,
> > 
> > I got below complaining when I use local variable for TDSYSINFO_STRUCT and CMR
> > array:
> > 
> > arch/x86/virt/vmx/tdx/tdx.c:383:1: warning: the frame size of 3072 bytes is
> > larger than 1024 bytes [-Wframe-larger-than=]
> >   383 | }
> > 
> > So I don't think we can use local variable for them.  I'll still use static
> > variables to avoid dynamic allocation.  In the commit message, I'll explain they
> > are too big to put into the stack, and KVM will need to use TDSYSINFO_STRUCT
> > reported by TDX module anyway.
> > 
> > Let me know if you don't agree?
> 
> Btw, CMR array alone can be put into the stack.  It will never be used by KVM,
> so I'll put CMR array as local variable, but keep tdx_sysinfo as static
> variable.
> 

Sorry for multiple emails about this.  If I put CMR array to the stack, I still
sometimes get the build warning.  So will use static variables.

Also, constructing TDMRs internally needs to use tdx_sysinfo (max_tdmrs,
pamt_entry_size, max_rsvd_per_tdmr), so with static variable they don't need to
be passed around as function arguments.
diff mbox series

Patch

diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index ef2718423f0f..482e6d858181 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -80,6 +80,11 @@  static DEFINE_MUTEX(tdx_module_lock);
 
 static struct p_seamldr_info p_seamldr_info;
 
+/* Base address of CMR array needs to be 512 bytes aligned. */
+static struct cmr_info tdx_cmr_array[MAX_CMRS] __aligned(CMR_INFO_ARRAY_ALIGNMENT);
+static int tdx_cmr_num;
+static struct tdsysinfo_struct tdx_sysinfo;
+
 static bool __seamrr_enabled(void)
 {
 	return (seamrr_mask & SEAMRR_ENABLED_BITS) == SEAMRR_ENABLED_BITS;
@@ -468,6 +473,127 @@  static int tdx_module_init_cpus(void)
 	return seamcall_on_each_cpu(&sc);
 }
 
+static inline bool cmr_valid(struct cmr_info *cmr)
+{
+	return !!cmr->size;
+}
+
+static void print_cmrs(struct cmr_info *cmr_array, int cmr_num,
+		       const char *name)
+{
+	int i;
+
+	for (i = 0; i < cmr_num; i++) {
+		struct cmr_info *cmr = &cmr_array[i];
+
+		pr_info("%s : [0x%llx, 0x%llx)\n", name,
+				cmr->base, cmr->base + cmr->size);
+	}
+}
+
+static int sanitize_cmrs(struct cmr_info *cmr_array, int cmr_num)
+{
+	int i, j;
+
+	/*
+	 * Intel TDX module spec, 20.7.3 CMR_INFO:
+	 *
+	 *   TDH.SYS.INFO leaf function returns a MAX_CMRS (32) entry
+	 *   array of CMR_INFO entries. The CMRs are sorted from the
+	 *   lowest base address to the highest base address, and they
+	 *   are non-overlapping.
+	 *
+	 * This implies that BIOS may generate invalid empty entries
+	 * if total CMRs are less than 32.  Skip them manually.
+	 */
+	for (i = 0; i < cmr_num; i++) {
+		struct cmr_info *cmr = &cmr_array[i];
+		struct cmr_info *prev_cmr = NULL;
+
+		/* Skip further invalid CMRs */
+		if (!cmr_valid(cmr))
+			break;
+
+		if (i > 0)
+			prev_cmr = &cmr_array[i - 1];
+
+		/*
+		 * It is a TDX firmware bug if CMRs are not
+		 * in address ascending order.
+		 */
+		if (prev_cmr && ((prev_cmr->base + prev_cmr->size) >
+					cmr->base)) {
+			pr_err("Firmware bug: CMRs not in address ascending order.\n");
+			return -EFAULT;
+		}
+	}
+
+	/*
+	 * Also a sane BIOS should never generate invalid CMR(s) between
+	 * two valid CMRs.  Sanity check this and simply return error in
+	 * this case.
+	 *
+	 * By reaching here @i is the index of the first invalid CMR (or
+	 * cmr_num).  Starting with next entry of @i since it has already
+	 * been checked.
+	 */
+	for (j = i + 1; j < cmr_num; j++)
+		if (cmr_valid(&cmr_array[j])) {
+			pr_err("Firmware bug: invalid CMR(s) among valid CMRs.\n");
+			return -EFAULT;
+		}
+
+	/*
+	 * Trim all tail invalid empty CMRs.  BIOS should generate at
+	 * least one valid CMR, otherwise it's a TDX firmware bug.
+	 */
+	tdx_cmr_num = i;
+	if (!tdx_cmr_num) {
+		pr_err("Firmware bug: No valid CMR.\n");
+		return -EFAULT;
+	}
+
+	/* Print kernel sanitized CMRs */
+	print_cmrs(tdx_cmr_array, tdx_cmr_num, "Kernel-sanitized-CMR");
+
+	return 0;
+}
+
+static int tdx_get_sysinfo(void)
+{
+	struct tdx_module_output out;
+	u64 tdsysinfo_sz, cmr_num;
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct tdsysinfo_struct) != TDSYSINFO_STRUCT_SIZE);
+
+	ret = seamcall(TDH_SYS_INFO, __pa(&tdx_sysinfo), TDSYSINFO_STRUCT_SIZE,
+			__pa(tdx_cmr_array), MAX_CMRS, NULL, &out);
+	if (ret)
+		return ret;
+
+	/*
+	 * If TDH.SYS.CONFIG succeeds, RDX contains the actual bytes
+	 * written to @tdx_sysinfo and R9 contains the actual entries
+	 * written to @tdx_cmr_array.  Sanity check them.
+	 */
+	tdsysinfo_sz = out.rdx;
+	cmr_num = out.r9;
+	if (WARN_ON_ONCE((tdsysinfo_sz > sizeof(tdx_sysinfo)) || !tdsysinfo_sz ||
+				(cmr_num > MAX_CMRS) || !cmr_num))
+		return -EFAULT;
+
+	pr_info("TDX module: vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
+		tdx_sysinfo.vendor_id, tdx_sysinfo.major_version,
+		tdx_sysinfo.minor_version, tdx_sysinfo.build_date,
+		tdx_sysinfo.build_num);
+
+	/* Print BIOS provided CMRs */
+	print_cmrs(tdx_cmr_array, cmr_num, "BIOS-CMR");
+
+	return sanitize_cmrs(tdx_cmr_array, cmr_num);
+}
+
 static int init_tdx_module(void)
 {
 	int ret;
@@ -482,6 +608,11 @@  static int init_tdx_module(void)
 	if (ret)
 		goto out;
 
+	/* Get TDX module information and CMRs */
+	ret = tdx_get_sysinfo();
+	if (ret)
+		goto out;
+
 	/*
 	 * Return -EFAULT until all steps of TDX module
 	 * initialization are done.
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index b8cfdd6e12f3..2f21c45df6ac 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -29,6 +29,66 @@  struct p_seamldr_info {
 	u8	reserved2[88];
 } __packed __aligned(P_SEAMLDR_INFO_ALIGNMENT);
 
+struct cmr_info {
+	u64	base;
+	u64	size;
+} __packed;
+
+#define MAX_CMRS			32
+#define CMR_INFO_ARRAY_ALIGNMENT	512
+
+struct cpuid_config {
+	u32	leaf;
+	u32	sub_leaf;
+	u32	eax;
+	u32	ebx;
+	u32	ecx;
+	u32	edx;
+} __packed;
+
+#define TDSYSINFO_STRUCT_SIZE		1024
+#define TDSYSINFO_STRUCT_ALIGNMENT	1024
+
+struct tdsysinfo_struct {
+	/* TDX-SEAM Module Info */
+	u32	attributes;
+	u32	vendor_id;
+	u32	build_date;
+	u16	build_num;
+	u16	minor_version;
+	u16	major_version;
+	u8	reserved0[14];
+	/* Memory Info */
+	u16	max_tdmrs;
+	u16	max_reserved_per_tdmr;
+	u16	pamt_entry_size;
+	u8	reserved1[10];
+	/* Control Struct Info */
+	u16	tdcs_base_size;
+	u8	reserved2[2];
+	u16	tdvps_base_size;
+	u8	tdvps_xfam_dependent_size;
+	u8	reserved3[9];
+	/* TD Capabilities */
+	u64	attributes_fixed0;
+	u64	attributes_fixed1;
+	u64	xfam_fixed0;
+	u64	xfam_fixed1;
+	u8	reserved4[32];
+	u32	num_cpuid_config;
+	/*
+	 * The actual number of CPUID_CONFIG depends on above
+	 * 'num_cpuid_config'.  The size of 'struct tdsysinfo_struct'
+	 * is 1024B defined by TDX architecture.  Use a union with
+	 * specific padding to make 'sizeof(struct tdsysinfo_struct)'
+	 * equal to 1024.
+	 */
+	union {
+		struct cpuid_config	cpuid_configs[0];
+		u8			reserved5[892];
+	};
+} __packed __aligned(TDSYSINFO_STRUCT_ALIGNMENT);
+
 /*
  * P-SEAMLDR SEAMCALL leaf function
  */
@@ -38,6 +98,7 @@  struct p_seamldr_info {
 /*
  * TDX module SEAMCALL leaf functions
  */
+#define TDH_SYS_INFO		32
 #define TDH_SYS_INIT		33
 #define TDH_SYS_LP_INIT		35
 #define TDH_SYS_LP_SHUTDOWN	44