diff mbox series

[v3,6/6] x86/vmware: Add TDX hypercall support

Message ID 20231219215751.9445-7-alexey.makhalov@broadcom.com (mailing list archive)
State Superseded
Headers show
Series VMware hypercalls enhancements | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1117 this patch: 1117
netdev/cc_maintainers warning 1 maintainers not CCed: amakhalov@vmware.com
netdev/build_clang fail Errors and warnings before: 12 this patch: 17
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1145 this patch: 1152
netdev/checkpatch warning CHECK: Prefer kernel type 'u32' over 'uint32_t' CHECK: extern prototypes should be avoided in .h files
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Alexey Makhalov Dec. 19, 2023, 9:57 p.m. UTC
From: Alexey Makhalov <amakhalov@vmware.com>

VMware hypercalls use I/O port, VMCALL or VMMCALL instructions.
Add __tdx_hypercall path to support TDX guests.

No change in high bandwidth hypercalls, as only low bandwidth
ones are supported for TDX guests.

Co-developed-by: Tim Merrifield <timothym@vmware.com>
Signed-off-by: Tim Merrifield <timothym@vmware.com>
Signed-off-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/vmware.h | 83 +++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/vmware.c  | 24 ++++++++++
 2 files changed, 107 insertions(+)

Comments

Kirill A . Shutemov Dec. 19, 2023, 11:23 p.m. UTC | #1
On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote:
> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
> index 3aa1adaed18f..ef07ab7a07e1 100644
> --- a/arch/x86/kernel/cpu/vmware.c
> +++ b/arch/x86/kernel/cpu/vmware.c
> @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void)
>  		(eax & BIT(VCPU_LEGACY_X2APIC));
>  }
>  
> +#ifdef CONFIG_INTEL_TDX_GUEST
> +unsigned long vmware_tdx_hypercall(unsigned long cmd,
> +				   struct tdx_module_args *args)
> +{
> +	if (!hypervisor_is_type(X86_HYPER_VMWARE))
> +		return 0;
> +
> +	if (cmd & ~VMWARE_CMD_MASK) {
> +		pr_warn("Out of range command %x\n", cmd);
> +		return 0;

Is zero success? Shouldn't it be an error?

> +	}
> +
> +	args->r10 = VMWARE_TDX_VENDOR_LEAF;
> +	args->r11 = VMWARE_TDX_HCALL_FUNC;
> +	args->r12 = VMWARE_HYPERVISOR_MAGIC;
> +	args->r13 = cmd;
> +
> +	__tdx_hypercall(args);
> +
> +	return args->r12;
> +}
> +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
> +#endif
> +
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
>  					struct pt_regs *regs)
> -- 
> 2.39.0
>
Alexey Makhalov Dec. 20, 2023, 12:27 a.m. UTC | #2
On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote:
> On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote:
>> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
>> index 3aa1adaed18f..ef07ab7a07e1 100644
>> --- a/arch/x86/kernel/cpu/vmware.c
>> +++ b/arch/x86/kernel/cpu/vmware.c
>> @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void)
>>   		(eax & BIT(VCPU_LEGACY_X2APIC));
>>   }
>>   
>> +#ifdef CONFIG_INTEL_TDX_GUEST
>> +unsigned long vmware_tdx_hypercall(unsigned long cmd,
>> +				   struct tdx_module_args *args)
>> +{
>> +	if (!hypervisor_is_type(X86_HYPER_VMWARE))
>> +		return 0;
>> +
>> +	if (cmd & ~VMWARE_CMD_MASK) {
>> +		pr_warn("Out of range command %x\n", cmd);
>> +		return 0;
> 
> Is zero success? Shouldn't it be an error?

VMware hypercalls do not have a standard way of signalling an error.
To generalize expectations from the caller perspective of any existing 
hypercalls: error (including hypercall is not supported or disabled) is 
when return value is 0 and out1/2 are unchanged or equal to in1/in2.

All existing vmware_hypercall callers will gracefully handle returned 0.
But they should never hit this path, as 0 bail out was introduced as a 
protection for the case where exported vmware_tdx_hypercall is used by 
random caller (not following VMware hypercall ABI).

> 
>> +	}
>> +
>> +	args->r10 = VMWARE_TDX_VENDOR_LEAF;
>> +	args->r11 = VMWARE_TDX_HCALL_FUNC;
>> +	args->r12 = VMWARE_HYPERVISOR_MAGIC;
>> +	args->r13 = cmd;
>> +
>> +	__tdx_hypercall(args);
>> +
>> +	return args->r12;
>> +}
>> +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
>> +#endif
>> +
>>   #ifdef CONFIG_AMD_MEM_ENCRYPT
>>   static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
>>   					struct pt_regs *regs)
>> -- 
>> 2.39.0
>>
>
Kirill A . Shutemov Dec. 20, 2023, 1 a.m. UTC | #3
On Tue, Dec 19, 2023 at 04:27:51PM -0800, Alexey Makhalov wrote:
> 
> 
> On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote:
> > On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote:
> > > diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
> > > index 3aa1adaed18f..ef07ab7a07e1 100644
> > > --- a/arch/x86/kernel/cpu/vmware.c
> > > +++ b/arch/x86/kernel/cpu/vmware.c
> > > @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void)
> > >   		(eax & BIT(VCPU_LEGACY_X2APIC));
> > >   }
> > > +#ifdef CONFIG_INTEL_TDX_GUEST
> > > +unsigned long vmware_tdx_hypercall(unsigned long cmd,
> > > +				   struct tdx_module_args *args)
> > > +{
> > > +	if (!hypervisor_is_type(X86_HYPER_VMWARE))
> > > +		return 0;

BTW, don't you want to warn here to? We don't expect vmware hypercalls to
be called by non-vmware guest, do we?

> > > +
> > > +	if (cmd & ~VMWARE_CMD_MASK) {
> > > +		pr_warn("Out of range command %x\n", cmd);
> > > +		return 0;
> > 
> > Is zero success? Shouldn't it be an error?
> 
> VMware hypercalls do not have a standard way of signalling an error.
> To generalize expectations from the caller perspective of any existing
> hypercalls: error (including hypercall is not supported or disabled) is when
> return value is 0 and out1/2 are unchanged or equal to in1/in2.

You are talking about signaling errors over hypercall transport. But if
kernel can see that something is wrong why cannot it signal the issue
clearly to caller. It is going to be in-kernel convention.

And to very least, it has to be pr_warn_once().
Alexey Makhalov Dec. 20, 2023, 3:02 a.m. UTC | #4
On 12/19/23 5:00 PM, kirill.shutemov@linux.intel.com wrote:
> On Tue, Dec 19, 2023 at 04:27:51PM -0800, Alexey Makhalov wrote:
>>
>>
>> On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote:
>>> On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote:
>>>> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
>>>> index 3aa1adaed18f..ef07ab7a07e1 100644
>>>> --- a/arch/x86/kernel/cpu/vmware.c
>>>> +++ b/arch/x86/kernel/cpu/vmware.c
>>>> @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void)
>>>>    		(eax & BIT(VCPU_LEGACY_X2APIC));
>>>>    }
>>>> +#ifdef CONFIG_INTEL_TDX_GUEST
>>>> +unsigned long vmware_tdx_hypercall(unsigned long cmd,
>>>> +				   struct tdx_module_args *args)
>>>> +{
>>>> +	if (!hypervisor_is_type(X86_HYPER_VMWARE))
>>>> +		return 0;
> 
> BTW, don't you want to warn here to? We don't expect vmware hypercalls to
> be called by non-vmware guest, do we?

The answer is below...

> 
>>>> +
>>>> +	if (cmd & ~VMWARE_CMD_MASK) {
>>>> +		pr_warn("Out of range command %x\n", cmd);
>>>> +		return 0;
>>>
>>> Is zero success? Shouldn't it be an error?
>>
>> VMware hypercalls do not have a standard way of signalling an error.
>> To generalize expectations from the caller perspective of any existing
>> hypercalls: error (including hypercall is not supported or disabled) is when
>> return value is 0 and out1/2 are unchanged or equal to in1/in2.
> 
> You are talking about signaling errors over hypercall transport. But if
> kernel can see that something is wrong why cannot it signal the issue
> clearly to caller. It is going to be in-kernel convention.These "return 0" blocks were introduced to protect against non-vmware 
guest or arbitrary modules trying to use __tdx_hypercall via exported 
vmware_tdx_hypercall function. In this case, it will be NOOP behavior 
with no or minor side effects.

 From valid vmware_hypercall callers point of view, there is no such 
thing as a hypercall not available. Once guest detection code recognizes 
VMWare hypervisor via cpuid, it will start using hypercalls in 
accordance to per-call API.

Valid VMware guest code will never go into first return, no warning 
required.
Second return can be hit in rare cases for example during development 
phase, or, hypothetical case, when cmd was dynamically generated.
That's why we have a warning warning only for the second condition.

While speaking about it, I'm started to lean towards your 
recommendation. Yes, we can return standard error code such as -EINVAL 
or just -1 instead of "return 0" in this function. And it will be 
algorithmically correct. As if Vmware guest caller provide out of range 
cmd - it is not documented behavior.

Speaking of additional in-kernel convention for passing additional 
parameter if error happens, it does not makes sense for me because:
1. existing caller codes analyze output argument to recognize error 
error response from the hypervisor. Adding one additional check for 
in-kernel errors just for TDX path which will be never hit by valid code 
in production is an unnecessary overhead.
2. It will definitely add an overhead as an error code will require one 
more output value, or out0 should be moved from return in-register value 
to return by pointer function argument.

Summarizing, overloading vmware_tdx_hypercall return value by arg0 (from 
the hypervisor) and kernel error (-1 or any other) seems like reasonable 
change.

> 
> And to very least, it has to be pr_warn_once().
> 
Good catch! Will change it.

Thanks,
--Alexey
kernel test robot Dec. 20, 2023, 1:09 p.m. UTC | #5
Hi Alexey,

kernel test robot noticed the following build warnings:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on dtor-input/next dtor-input/for-linus linus/master v6.7-rc6 next-20231220]
[cannot apply to tip/x86/vmware]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Alexey-Makhalov/x86-vmware-Move-common-macros-to-vmware-h/20231220-060028
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20231219215751.9445-7-alexey.makhalov%40broadcom.com
patch subject: [PATCH v3 6/6] x86/vmware: Add TDX hypercall support
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20231220/202312202020.5O1T2aSk-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231220/202312202020.5O1T2aSk-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312202020.5O1T2aSk-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> arch/x86/kernel/cpu/vmware.c:439:40: warning: format specifies type 'unsigned int' but the argument has type 'unsigned long' [-Wformat]
                   pr_warn("Out of range command %x\n", cmd);
                                                 ~~     ^~~
                                                 %lx
   include/linux/printk.h:508:37: note: expanded from macro 'pr_warn'
           printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
                                      ~~~     ^~~~~~~~~~~
   include/linux/printk.h:455:60: note: expanded from macro 'printk'
   #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
                                                       ~~~    ^~~~~~~~~~~
   include/linux/printk.h:427:19: note: expanded from macro 'printk_index_wrap'
                   _p_func(_fmt, ##__VA_ARGS__);                           \
                           ~~~~    ^~~~~~~~~~~
   1 warning generated.


vim +439 arch/x86/kernel/cpu/vmware.c

   430	
   431	#ifdef CONFIG_INTEL_TDX_GUEST
   432	unsigned long vmware_tdx_hypercall(unsigned long cmd,
   433					   struct tdx_module_args *args)
   434	{
   435		if (!hypervisor_is_type(X86_HYPER_VMWARE))
   436			return 0;
   437	
   438		if (cmd & ~VMWARE_CMD_MASK) {
 > 439			pr_warn("Out of range command %x\n", cmd);
   440			return 0;
   441		}
   442	
   443		args->r10 = VMWARE_TDX_VENDOR_LEAF;
   444		args->r11 = VMWARE_TDX_HCALL_FUNC;
   445		args->r12 = VMWARE_HYPERVISOR_MAGIC;
   446		args->r13 = cmd;
   447	
   448		__tdx_hypercall(args);
   449	
   450		return args->r12;
   451	}
   452	EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
   453	#endif
   454
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index 719e41260ece..cad6f5b371a8 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -34,12 +34,65 @@ 
 #define VMWARE_CMD_GETHZ		45
 #define VMWARE_CMD_GETVCPU_INFO		68
 #define VMWARE_CMD_STEALCLOCK		91
+/*
+ * Hypercall command mask:
+ *   bits[6:0] command, range [0, 127]
+ *   bits[19:16] sub-command, range [0, 15]
+ */
+#define VMWARE_CMD_MASK			0xf007fU
 
 #define CPUID_VMWARE_FEATURES_ECX_VMMCALL	BIT(0)
 #define CPUID_VMWARE_FEATURES_ECX_VMCALL	BIT(1)
 
 extern u8 vmware_hypercall_mode;
 
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC  1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+					  struct tdx_module_args *args);
+
+/*
+ * TDCALL[TDG.VP.VMCALL] uses rax (arg0) and rcx (arg2), while the use of
+ * rbp (arg6) is discouraged by the TDX specification. Therefore, we
+ * remap those registers to r12, r13 and r14, respectively.
+ */
+static inline
+unsigned long vmware_tdx_hypercall_args(unsigned long cmd, unsigned long in1,
+					unsigned long in3, unsigned long in4,
+					unsigned long in5, unsigned long in6,
+					uint32_t *out1, uint32_t *out2,
+					uint32_t *out3, uint32_t *out4,
+					uint32_t *out5, uint32_t *out6)
+{
+	unsigned long ret;
+
+	struct tdx_module_args args = {
+		.rbx = in1,
+		.rdx = in3,
+		.rsi = in4,
+		.rdi = in5,
+		.r14 = in6,
+	};
+
+	ret = vmware_tdx_hypercall(cmd, &args);
+
+	if (out1)
+		*out1 = args.rbx;
+	if (out2)
+		*out2 = args.r13;
+	if (out3)
+		*out3 = args.rdx;
+	if (out4)
+		*out4 = args.rsi;
+	if (out5)
+		*out5 = args.rdi;
+	if (out6)
+		*out6 = args.r14;
+
+	return ret;
+}
+
 /*
  * The low bandwidth call. The low word of edx is presumed to have OUT bit
  * set. The high word of edx may contain input data from the caller.
@@ -67,6 +120,11 @@  unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0,
+						 NULL, NULL, NULL,
+						 NULL, NULL, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0)
 		: [port] "i" (VMWARE_HYPERVISOR_PORT),
@@ -85,6 +143,11 @@  unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0,
+						 out1, out2, NULL,
+						 NULL, NULL, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0), "=b" (*out1), "=c" (*out2)
 		: [port] "i" (VMWARE_HYPERVISOR_PORT),
@@ -104,6 +167,11 @@  unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0,
+						 out1, out2, out3,
+						 NULL, NULL, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
 		: [port] "i" (VMWARE_HYPERVISOR_PORT),
@@ -123,6 +191,11 @@  unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, in3, in4, in5, 0,
+						 NULL, out2, NULL,
+						 NULL, NULL, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0), "=c" (*out2)
 		: [port] "i" (VMWARE_HYPERVISOR_PORT),
@@ -145,6 +218,11 @@  unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, in3, 0, 0, 0,
+						 NULL, out2, out3,
+						 out4, out5, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
 		  "=D" (*out5)
@@ -166,6 +244,11 @@  unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
 {
 	unsigned long out0;
 
+	if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+		return vmware_tdx_hypercall_args(cmd, in1, in3, in4, in5, 0,
+						 out1, out2, out3,
+						 NULL, NULL, NULL);
+
 	asm_inline volatile (VMWARE_HYPERCALL
 		: "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
 		: [port] "i" (VMWARE_HYPERVISOR_PORT),
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 3aa1adaed18f..ef07ab7a07e1 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -428,6 +428,30 @@  static bool __init vmware_legacy_x2apic_available(void)
 		(eax & BIT(VCPU_LEGACY_X2APIC));
 }
 
+#ifdef CONFIG_INTEL_TDX_GUEST
+unsigned long vmware_tdx_hypercall(unsigned long cmd,
+				   struct tdx_module_args *args)
+{
+	if (!hypervisor_is_type(X86_HYPER_VMWARE))
+		return 0;
+
+	if (cmd & ~VMWARE_CMD_MASK) {
+		pr_warn("Out of range command %x\n", cmd);
+		return 0;
+	}
+
+	args->r10 = VMWARE_TDX_VENDOR_LEAF;
+	args->r11 = VMWARE_TDX_HCALL_FUNC;
+	args->r12 = VMWARE_HYPERVISOR_MAGIC;
+	args->r13 = cmd;
+
+	__tdx_hypercall(args);
+
+	return args->r12;
+}
+EXPORT_SYMBOL_GPL(vmware_tdx_hypercall);
+#endif
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
 					struct pt_regs *regs)