Message ID | 20231219215751.9445-7-alexey.makhalov@broadcom.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | VMware hypercalls enhancements | expand |
On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote: > diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c > index 3aa1adaed18f..ef07ab7a07e1 100644 > --- a/arch/x86/kernel/cpu/vmware.c > +++ b/arch/x86/kernel/cpu/vmware.c > @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void) > (eax & BIT(VCPU_LEGACY_X2APIC)); > } > > +#ifdef CONFIG_INTEL_TDX_GUEST > +unsigned long vmware_tdx_hypercall(unsigned long cmd, > + struct tdx_module_args *args) > +{ > + if (!hypervisor_is_type(X86_HYPER_VMWARE)) > + return 0; > + > + if (cmd & ~VMWARE_CMD_MASK) { > + pr_warn("Out of range command %x\n", cmd); > + return 0; Is zero success? Shouldn't it be an error? > + } > + > + args->r10 = VMWARE_TDX_VENDOR_LEAF; > + args->r11 = VMWARE_TDX_HCALL_FUNC; > + args->r12 = VMWARE_HYPERVISOR_MAGIC; > + args->r13 = cmd; > + > + __tdx_hypercall(args); > + > + return args->r12; > +} > +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); > +#endif > + > #ifdef CONFIG_AMD_MEM_ENCRYPT > static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, > struct pt_regs *regs) > -- > 2.39.0 >
On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote: > On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote: >> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c >> index 3aa1adaed18f..ef07ab7a07e1 100644 >> --- a/arch/x86/kernel/cpu/vmware.c >> +++ b/arch/x86/kernel/cpu/vmware.c >> @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void) >> (eax & BIT(VCPU_LEGACY_X2APIC)); >> } >> >> +#ifdef CONFIG_INTEL_TDX_GUEST >> +unsigned long vmware_tdx_hypercall(unsigned long cmd, >> + struct tdx_module_args *args) >> +{ >> + if (!hypervisor_is_type(X86_HYPER_VMWARE)) >> + return 0; >> + >> + if (cmd & ~VMWARE_CMD_MASK) { >> + pr_warn("Out of range command %x\n", cmd); >> + return 0; > > Is zero success? Shouldn't it be an error? VMware hypercalls do not have a standard way of signalling an error. To generalize expectations from the caller perspective of any existing hypercalls: error (including hypercall is not supported or disabled) is when return value is 0 and out1/2 are unchanged or equal to in1/in2. All existing vmware_hypercall callers will gracefully handle returned 0. But they should never hit this path, as 0 bail out was introduced as a protection for the case where exported vmware_tdx_hypercall is used by random caller (not following VMware hypercall ABI). > >> + } >> + >> + args->r10 = VMWARE_TDX_VENDOR_LEAF; >> + args->r11 = VMWARE_TDX_HCALL_FUNC; >> + args->r12 = VMWARE_HYPERVISOR_MAGIC; >> + args->r13 = cmd; >> + >> + __tdx_hypercall(args); >> + >> + return args->r12; >> +} >> +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); >> +#endif >> + >> #ifdef CONFIG_AMD_MEM_ENCRYPT >> static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, >> struct pt_regs *regs) >> -- >> 2.39.0 >> >
On Tue, Dec 19, 2023 at 04:27:51PM -0800, Alexey Makhalov wrote: > > > On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote: > > On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote: > > > diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c > > > index 3aa1adaed18f..ef07ab7a07e1 100644 > > > --- a/arch/x86/kernel/cpu/vmware.c > > > +++ b/arch/x86/kernel/cpu/vmware.c > > > @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void) > > > (eax & BIT(VCPU_LEGACY_X2APIC)); > > > } > > > +#ifdef CONFIG_INTEL_TDX_GUEST > > > +unsigned long vmware_tdx_hypercall(unsigned long cmd, > > > + struct tdx_module_args *args) > > > +{ > > > + if (!hypervisor_is_type(X86_HYPER_VMWARE)) > > > + return 0; BTW, don't you want to warn here to? We don't expect vmware hypercalls to be called by non-vmware guest, do we? > > > + > > > + if (cmd & ~VMWARE_CMD_MASK) { > > > + pr_warn("Out of range command %x\n", cmd); > > > + return 0; > > > > Is zero success? Shouldn't it be an error? > > VMware hypercalls do not have a standard way of signalling an error. > To generalize expectations from the caller perspective of any existing > hypercalls: error (including hypercall is not supported or disabled) is when > return value is 0 and out1/2 are unchanged or equal to in1/in2. You are talking about signaling errors over hypercall transport. But if kernel can see that something is wrong why cannot it signal the issue clearly to caller. It is going to be in-kernel convention. And to very least, it has to be pr_warn_once().
On 12/19/23 5:00 PM, kirill.shutemov@linux.intel.com wrote: > On Tue, Dec 19, 2023 at 04:27:51PM -0800, Alexey Makhalov wrote: >> >> >> On 12/19/23 3:23 PM, kirill.shutemov@linux.intel.com wrote: >>> On Tue, Dec 19, 2023 at 01:57:51PM -0800, Alexey Makhalov wrote: >>>> diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c >>>> index 3aa1adaed18f..ef07ab7a07e1 100644 >>>> --- a/arch/x86/kernel/cpu/vmware.c >>>> +++ b/arch/x86/kernel/cpu/vmware.c >>>> @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void) >>>> (eax & BIT(VCPU_LEGACY_X2APIC)); >>>> } >>>> +#ifdef CONFIG_INTEL_TDX_GUEST >>>> +unsigned long vmware_tdx_hypercall(unsigned long cmd, >>>> + struct tdx_module_args *args) >>>> +{ >>>> + if (!hypervisor_is_type(X86_HYPER_VMWARE)) >>>> + return 0; > > BTW, don't you want to warn here to? We don't expect vmware hypercalls to > be called by non-vmware guest, do we? The answer is below... > >>>> + >>>> + if (cmd & ~VMWARE_CMD_MASK) { >>>> + pr_warn("Out of range command %x\n", cmd); >>>> + return 0; >>> >>> Is zero success? Shouldn't it be an error? >> >> VMware hypercalls do not have a standard way of signalling an error. >> To generalize expectations from the caller perspective of any existing >> hypercalls: error (including hypercall is not supported or disabled) is when >> return value is 0 and out1/2 are unchanged or equal to in1/in2. > > You are talking about signaling errors over hypercall transport. But if > kernel can see that something is wrong why cannot it signal the issue > clearly to caller. It is going to be in-kernel convention.These "return 0" blocks were introduced to protect against non-vmware guest or arbitrary modules trying to use __tdx_hypercall via exported vmware_tdx_hypercall function. In this case, it will be NOOP behavior with no or minor side effects. From valid vmware_hypercall callers point of view, there is no such thing as a hypercall not available. Once guest detection code recognizes VMWare hypervisor via cpuid, it will start using hypercalls in accordance to per-call API. Valid VMware guest code will never go into first return, no warning required. Second return can be hit in rare cases for example during development phase, or, hypothetical case, when cmd was dynamically generated. That's why we have a warning warning only for the second condition. While speaking about it, I'm started to lean towards your recommendation. Yes, we can return standard error code such as -EINVAL or just -1 instead of "return 0" in this function. And it will be algorithmically correct. As if Vmware guest caller provide out of range cmd - it is not documented behavior. Speaking of additional in-kernel convention for passing additional parameter if error happens, it does not makes sense for me because: 1. existing caller codes analyze output argument to recognize error error response from the hypervisor. Adding one additional check for in-kernel errors just for TDX path which will be never hit by valid code in production is an unnecessary overhead. 2. It will definitely add an overhead as an error code will require one more output value, or out0 should be moved from return in-register value to return by pointer function argument. Summarizing, overloading vmware_tdx_hypercall return value by arg0 (from the hypervisor) and kernel error (-1 or any other) seems like reasonable change. > > And to very least, it has to be pr_warn_once(). > Good catch! Will change it. Thanks, --Alexey
Hi Alexey, kernel test robot noticed the following build warnings: [auto build test WARNING on drm-misc/drm-misc-next] [also build test WARNING on dtor-input/next dtor-input/for-linus linus/master v6.7-rc6 next-20231220] [cannot apply to tip/x86/vmware] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Alexey-Makhalov/x86-vmware-Move-common-macros-to-vmware-h/20231220-060028 base: git://anongit.freedesktop.org/drm/drm-misc drm-misc-next patch link: https://lore.kernel.org/r/20231219215751.9445-7-alexey.makhalov%40broadcom.com patch subject: [PATCH v3 6/6] x86/vmware: Add TDX hypercall support config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20231220/202312202020.5O1T2aSk-lkp@intel.com/config) compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231220/202312202020.5O1T2aSk-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202312202020.5O1T2aSk-lkp@intel.com/ All warnings (new ones prefixed by >>): >> arch/x86/kernel/cpu/vmware.c:439:40: warning: format specifies type 'unsigned int' but the argument has type 'unsigned long' [-Wformat] pr_warn("Out of range command %x\n", cmd); ~~ ^~~ %lx include/linux/printk.h:508:37: note: expanded from macro 'pr_warn' printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) ~~~ ^~~~~~~~~~~ include/linux/printk.h:455:60: note: expanded from macro 'printk' #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) ~~~ ^~~~~~~~~~~ include/linux/printk.h:427:19: note: expanded from macro 'printk_index_wrap' _p_func(_fmt, ##__VA_ARGS__); \ ~~~~ ^~~~~~~~~~~ 1 warning generated. vim +439 arch/x86/kernel/cpu/vmware.c 430 431 #ifdef CONFIG_INTEL_TDX_GUEST 432 unsigned long vmware_tdx_hypercall(unsigned long cmd, 433 struct tdx_module_args *args) 434 { 435 if (!hypervisor_is_type(X86_HYPER_VMWARE)) 436 return 0; 437 438 if (cmd & ~VMWARE_CMD_MASK) { > 439 pr_warn("Out of range command %x\n", cmd); 440 return 0; 441 } 442 443 args->r10 = VMWARE_TDX_VENDOR_LEAF; 444 args->r11 = VMWARE_TDX_HCALL_FUNC; 445 args->r12 = VMWARE_HYPERVISOR_MAGIC; 446 args->r13 = cmd; 447 448 __tdx_hypercall(args); 449 450 return args->r12; 451 } 452 EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); 453 #endif 454
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index 719e41260ece..cad6f5b371a8 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h @@ -34,12 +34,65 @@ #define VMWARE_CMD_GETHZ 45 #define VMWARE_CMD_GETVCPU_INFO 68 #define VMWARE_CMD_STEALCLOCK 91 +/* + * Hypercall command mask: + * bits[6:0] command, range [0, 127] + * bits[19:16] sub-command, range [0, 15] + */ +#define VMWARE_CMD_MASK 0xf007fU #define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) #define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) extern u8 vmware_hypercall_mode; +#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL +#define VMWARE_TDX_HCALL_FUNC 1 + +extern unsigned long vmware_tdx_hypercall(unsigned long cmd, + struct tdx_module_args *args); + +/* + * TDCALL[TDG.VP.VMCALL] uses rax (arg0) and rcx (arg2), while the use of + * rbp (arg6) is discouraged by the TDX specification. Therefore, we + * remap those registers to r12, r13 and r14, respectively. + */ +static inline +unsigned long vmware_tdx_hypercall_args(unsigned long cmd, unsigned long in1, + unsigned long in3, unsigned long in4, + unsigned long in5, unsigned long in6, + uint32_t *out1, uint32_t *out2, + uint32_t *out3, uint32_t *out4, + uint32_t *out5, uint32_t *out6) +{ + unsigned long ret; + + struct tdx_module_args args = { + .rbx = in1, + .rdx = in3, + .rsi = in4, + .rdi = in5, + .r14 = in6, + }; + + ret = vmware_tdx_hypercall(cmd, &args); + + if (out1) + *out1 = args.rbx; + if (out2) + *out2 = args.r13; + if (out3) + *out3 = args.rdx; + if (out4) + *out4 = args.rsi; + if (out5) + *out5 = args.rdi; + if (out6) + *out6 = args.r14; + + return ret; +} + /* * The low bandwidth call. The low word of edx is presumed to have OUT bit * set. The high word of edx may contain input data from the caller. @@ -67,6 +120,11 @@ unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1) { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0, + NULL, NULL, NULL, + NULL, NULL, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0) : [port] "i" (VMWARE_HYPERVISOR_PORT), @@ -85,6 +143,11 @@ unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1, { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0, + out1, out2, NULL, + NULL, NULL, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0), "=b" (*out1), "=c" (*out2) : [port] "i" (VMWARE_HYPERVISOR_PORT), @@ -104,6 +167,11 @@ unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1, { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, 0, 0, 0, 0, + out1, out2, out3, + NULL, NULL, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) : [port] "i" (VMWARE_HYPERVISOR_PORT), @@ -123,6 +191,11 @@ unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1, { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, in3, in4, in5, 0, + NULL, out2, NULL, + NULL, NULL, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0), "=c" (*out2) : [port] "i" (VMWARE_HYPERVISOR_PORT), @@ -145,6 +218,11 @@ unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1, { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, in3, 0, 0, 0, + NULL, out2, out3, + out4, out5, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4), "=D" (*out5) @@ -166,6 +244,11 @@ unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1, { unsigned long out0; + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return vmware_tdx_hypercall_args(cmd, in1, in3, in4, in5, 0, + out1, out2, out3, + NULL, NULL, NULL); + asm_inline volatile (VMWARE_HYPERCALL : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3) : [port] "i" (VMWARE_HYPERVISOR_PORT), diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 3aa1adaed18f..ef07ab7a07e1 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -428,6 +428,30 @@ static bool __init vmware_legacy_x2apic_available(void) (eax & BIT(VCPU_LEGACY_X2APIC)); } +#ifdef CONFIG_INTEL_TDX_GUEST +unsigned long vmware_tdx_hypercall(unsigned long cmd, + struct tdx_module_args *args) +{ + if (!hypervisor_is_type(X86_HYPER_VMWARE)) + return 0; + + if (cmd & ~VMWARE_CMD_MASK) { + pr_warn("Out of range command %x\n", cmd); + return 0; + } + + args->r10 = VMWARE_TDX_VENDOR_LEAF; + args->r11 = VMWARE_TDX_HCALL_FUNC; + args->r12 = VMWARE_HYPERVISOR_MAGIC; + args->r13 = cmd; + + __tdx_hypercall(args); + + return args->r12; +} +EXPORT_SYMBOL_GPL(vmware_tdx_hypercall); +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)