diff mbox series

[4/6] x86,hyperv: Clean up hv_do_hypercall()

Message ID 20250414113754.285564821@infradead.org (mailing list archive)
State New
Headers show
Series objtool: Detect and warn about indirect calls in __nocfi functions | expand

Commit Message

Peter Zijlstra April 14, 2025, 11:11 a.m. UTC
What used to be a simple few instructions has turned into a giant mess
(for x86_64). Not only does it use static_branch wrong, it mixes it
with dynamic branches for no apparent reason.

Notably it uses static_branch through an out-of-line function call,
which completely defeats the purpose, since instead of a simple
JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
absolutely idiotic.

Add to that a dynamic test of hyperv_paravisor_present, something
which is set once and never changed.

Replace all this idiocy with a single direct function call to the
right hypercall variant.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/hyperv/hv_init.c       |   21 ++++++
 arch/x86/hyperv/ivm.c           |   14 ++++
 arch/x86/include/asm/mshyperv.h |  137 +++++++++++-----------------------------
 arch/x86/kernel/cpu/mshyperv.c  |   18 +++--
 4 files changed, 88 insertions(+), 102 deletions(-)

Comments

Peter Zijlstra April 14, 2025, 11:47 a.m. UTC | #1
On Mon, Apr 14, 2025 at 01:11:44PM +0200, Peter Zijlstra wrote:

> +u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	u64 hv_status;
> +
> +	if (!hv_hypercall_pg)
> +		return U64_MAX;
> +
> +	register u64 __r8 asm("r8") = param2;
> +	asm volatile (CALL_NOSPEC
> +		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +		        "+c" (control), "+d" (param1)
> +		      : "r" (__r8),
> +		        THUNK_TARGET(hv_hypercall_pg)
> +		      : "cc", "memory", "r9", "r10", "r11");
> +
> +	return hv_status;
> +}

I've tried making this a tail-call, but even without the
ASM_CALL_CONSTRAINT on it confuses the compiler (objtool warns on
frame-pointer builds about non-matching stack).

I could of course have written the entire thing in asm, but meh.
Uros Bizjak April 14, 2025, 2:06 p.m. UTC | #2
On 14. 04. 25 13:11, Peter Zijlstra wrote:
> What used to be a simple few instructions has turned into a giant mess
> (for x86_64). Not only does it use static_branch wrong, it mixes it
> with dynamic branches for no apparent reason.
> 
> Notably it uses static_branch through an out-of-line function call,
> which completely defeats the purpose, since instead of a simple
> JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> absolutely idiotic.
> 
> Add to that a dynamic test of hyperv_paravisor_present, something
> which is set once and never changed.
> 
> Replace all this idiocy with a single direct function call to the
> right hypercall variant.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>   arch/x86/hyperv/hv_init.c       |   21 ++++++
>   arch/x86/hyperv/ivm.c           |   14 ++++
>   arch/x86/include/asm/mshyperv.h |  137 +++++++++++-----------------------------
>   arch/x86/kernel/cpu/mshyperv.c  |   18 +++--
>   4 files changed, 88 insertions(+), 102 deletions(-)
> 
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -35,7 +35,28 @@
>   #include <linux/highmem.h>
>   
>   void *hv_hypercall_pg;
> +
> +#ifdef CONFIG_X86_64
> +u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	u64 hv_status;
> +
> +	if (!hv_hypercall_pg)
> +		return U64_MAX;
> +
> +	register u64 __r8 asm("r8") = param2;
> +	asm volatile (CALL_NOSPEC
> +		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +		        "+c" (control), "+d" (param1)
> +		      : "r" (__r8),

r8 is call-clobbered register, so you should use "+r" (__r8) to properly 
clobber it:

		        "+c" (control), "+d" (param1), "+r" (__r8)
		      : THUNK_TARGET(hv_hypercall_pg)

> +		      : "cc", "memory", "r9", "r10", "r11");
> +
> +	return hv_status;
> +}
> +#else
>   EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> +#endif
>   
>   union hv_ghcb * __percpu *hv_ghcb_pg;
>   
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -376,6 +376,20 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon
>   	return ret;
>   }
>   
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +	u64 hv_status;
> +
> +	register u64 __r8 asm("r8") = param2;
> +	asm volatile("vmmcall"
> +		     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +		       "+c" (control), "+d" (param1)
> +		     : "r" (__r8)

Also here:
		        "+c" (control), "+d" (param1), "+r" (__r8)
		      :

> +		     : "cc", "memory", "r9", "r10", "r11");
> +
> +	return hv_status;
> +}

Uros.
Peter Zijlstra April 14, 2025, 2:08 p.m. UTC | #3
On Mon, Apr 14, 2025 at 04:06:41PM +0200, Uros Bizjak wrote:
> 
> 
> On 14. 04. 25 13:11, Peter Zijlstra wrote:
> > What used to be a simple few instructions has turned into a giant mess
> > (for x86_64). Not only does it use static_branch wrong, it mixes it
> > with dynamic branches for no apparent reason.
> > 
> > Notably it uses static_branch through an out-of-line function call,
> > which completely defeats the purpose, since instead of a simple
> > JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> > absolutely idiotic.
> > 
> > Add to that a dynamic test of hyperv_paravisor_present, something
> > which is set once and never changed.
> > 
> > Replace all this idiocy with a single direct function call to the
> > right hypercall variant.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >   arch/x86/hyperv/hv_init.c       |   21 ++++++
> >   arch/x86/hyperv/ivm.c           |   14 ++++
> >   arch/x86/include/asm/mshyperv.h |  137 +++++++++++-----------------------------
> >   arch/x86/kernel/cpu/mshyperv.c  |   18 +++--
> >   4 files changed, 88 insertions(+), 102 deletions(-)
> > 
> > --- a/arch/x86/hyperv/hv_init.c
> > +++ b/arch/x86/hyperv/hv_init.c
> > @@ -35,7 +35,28 @@
> >   #include <linux/highmem.h>
> >   void *hv_hypercall_pg;
> > +
> > +#ifdef CONFIG_X86_64
> > +u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2)
> > +{
> > +	u64 hv_status;
> > +
> > +	if (!hv_hypercall_pg)
> > +		return U64_MAX;
> > +
> > +	register u64 __r8 asm("r8") = param2;
> > +	asm volatile (CALL_NOSPEC
> > +		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> > +		        "+c" (control), "+d" (param1)
> > +		      : "r" (__r8),
> 
> r8 is call-clobbered register, so you should use "+r" (__r8) to properly
> clobber it:

Ah, okay.
diff mbox series

Patch

--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -35,7 +35,28 @@ 
 #include <linux/highmem.h>
 
 void *hv_hypercall_pg;
+
+#ifdef CONFIG_X86_64
+u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2)
+{
+	u64 hv_status;
+
+	if (!hv_hypercall_pg)
+		return U64_MAX;
+
+	register u64 __r8 asm("r8") = param2;
+	asm volatile (CALL_NOSPEC
+		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+		        "+c" (control), "+d" (param1)
+		      : "r" (__r8),
+		        THUNK_TARGET(hv_hypercall_pg)
+		      : "cc", "memory", "r9", "r10", "r11");
+
+	return hv_status;
+}
+#else
 EXPORT_SYMBOL_GPL(hv_hypercall_pg);
+#endif
 
 union hv_ghcb * __percpu *hv_ghcb_pg;
 
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -376,6 +376,20 @@  int hv_snp_boot_ap(u32 cpu, unsigned lon
 	return ret;
 }
 
+u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
+{
+	u64 hv_status;
+
+	register u64 __r8 asm("r8") = param2;
+	asm volatile("vmmcall"
+		     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+		       "+c" (control), "+d" (param1)
+		     : "r" (__r8)
+		     : "cc", "memory", "r9", "r10", "r11");
+
+	return hv_status;
+}
+
 #else
 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,6 +6,7 @@ 
 #include <linux/nmi.h>
 #include <linux/msi.h>
 #include <linux/io.h>
+#include <linux/static_call.h>
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
 #include <hyperv/hvhdk.h>
@@ -39,15 +40,20 @@  static inline unsigned char hv_get_nmi_r
 }
 
 #if IS_ENABLED(CONFIG_HYPERV)
-extern bool hyperv_paravisor_present;
-
 extern void *hv_hypercall_pg;
 
 extern union hv_ghcb * __percpu *hv_ghcb_pg;
 
 bool hv_isolation_type_snp(void);
 bool hv_isolation_type_tdx(void);
-u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+
+#ifdef CONFIG_X86_64
+extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
+extern u64 hv_pg_hypercall(u64 control, u64 param1, u64 param2);
+
+DECLARE_STATIC_CALL(hv_hypercall, hv_pg_hypercall);
+#endif
 
 /*
  * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
@@ -64,37 +70,15 @@  static inline u64 hv_do_hypercall(u64 co
 {
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
-	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
-		return hv_tdx_hypercall(control, input_address, output_address);
-
-	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
-		__asm__ __volatile__("mov %[output_address], %%r8\n"
-				     "vmmcall"
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input_address)
-				     : [output_address] "r" (output_address)
-				     : "cc", "memory", "r8", "r9", "r10", "r11");
-		return hv_status;
-	}
-
-	if (!hv_hypercall_pg)
-		return U64_MAX;
-
-	__asm__ __volatile__("mov %[output_address], %%r8\n"
-			     CALL_NOSPEC
-			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-			       "+c" (control), "+d" (input_address)
-			     : [output_address] "r" (output_address),
-			       THUNK_TARGET(hv_hypercall_pg)
-			     : "cc", "memory", "r8", "r9", "r10", "r11");
+	return static_call_mod(hv_hypercall)(control, input_address, output_address);
 #else
 	u32 input_address_hi = upper_32_bits(input_address);
 	u32 input_address_lo = lower_32_bits(input_address);
 	u32 output_address_hi = upper_32_bits(output_address);
 	u32 output_address_lo = lower_32_bits(output_address);
+	u64 hv_status;
 
 	if (!hv_hypercall_pg)
 		return U64_MAX;
@@ -107,8 +91,8 @@  static inline u64 hv_do_hypercall(u64 co
 			       "D"(output_address_hi), "S"(output_address_lo),
 			       THUNK_TARGET(hv_hypercall_pg)
 			     : "cc", "memory");
-#endif /* !x86_64 */
 	return hv_status;
+#endif /* !x86_64 */
 }
 
 /* Hypercall to the L0 hypervisor */
@@ -120,41 +104,23 @@  static inline u64 hv_do_nested_hypercall
 /* Fast hypercall with 8 bytes of input and no output */
 static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
 {
-	u64 hv_status;
-
 #ifdef CONFIG_X86_64
-	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
-		return hv_tdx_hypercall(control, input1, 0);
-
-	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
-		__asm__ __volatile__(
-				"vmmcall"
-				: "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				"+c" (control), "+d" (input1)
-				:: "cc", "r8", "r9", "r10", "r11");
-	} else {
-		__asm__ __volatile__(CALL_NOSPEC
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input1)
-				     : THUNK_TARGET(hv_hypercall_pg)
-				     : "cc", "r8", "r9", "r10", "r11");
-	}
+	return static_call_mod(hv_hypercall)(control, input1, 0);
 #else
-	{
-		u32 input1_hi = upper_32_bits(input1);
-		u32 input1_lo = lower_32_bits(input1);
-
-		__asm__ __volatile__ (CALL_NOSPEC
-				      : "=A"(hv_status),
-					"+c"(input1_lo),
-					ASM_CALL_CONSTRAINT
-				      :	"A" (control),
-					"b" (input1_hi),
-					THUNK_TARGET(hv_hypercall_pg)
-				      : "cc", "edi", "esi");
-	}
-#endif
+	u32 input1_hi = upper_32_bits(input1);
+	u32 input1_lo = lower_32_bits(input1);
+	u64 hv_status;
+
+	__asm__ __volatile__ (CALL_NOSPEC
+			      : "=A"(hv_status),
+			      "+c"(input1_lo),
+			      ASM_CALL_CONSTRAINT
+			      :	"A" (control),
+			      "b" (input1_hi),
+			      THUNK_TARGET(hv_hypercall_pg)
+			      : "cc", "edi", "esi");
 	return hv_status;
+#endif
 }
 
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
@@ -174,45 +140,24 @@  static inline u64 hv_do_fast_nested_hype
 /* Fast hypercall with 16 bytes of input */
 static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
 {
-	u64 hv_status;
-
 #ifdef CONFIG_X86_64
-	if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
-		return hv_tdx_hypercall(control, input1, input2);
-
-	if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
-		__asm__ __volatile__("mov %[input2], %%r8\n"
-				     "vmmcall"
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input1)
-				     : [input2] "r" (input2)
-				     : "cc", "r8", "r9", "r10", "r11");
-	} else {
-		__asm__ __volatile__("mov %[input2], %%r8\n"
-				     CALL_NOSPEC
-				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-				       "+c" (control), "+d" (input1)
-				     : [input2] "r" (input2),
-				       THUNK_TARGET(hv_hypercall_pg)
-				     : "cc", "r8", "r9", "r10", "r11");
-	}
+	return static_call_mod(hv_hypercall)(control, input1, input2);
 #else
-	{
-		u32 input1_hi = upper_32_bits(input1);
-		u32 input1_lo = lower_32_bits(input1);
-		u32 input2_hi = upper_32_bits(input2);
-		u32 input2_lo = lower_32_bits(input2);
-
-		__asm__ __volatile__ (CALL_NOSPEC
-				      : "=A"(hv_status),
-					"+c"(input1_lo), ASM_CALL_CONSTRAINT
-				      :	"A" (control), "b" (input1_hi),
-					"D"(input2_hi), "S"(input2_lo),
-					THUNK_TARGET(hv_hypercall_pg)
-				      : "cc");
-	}
-#endif
+	u32 input1_hi = upper_32_bits(input1);
+	u32 input1_lo = lower_32_bits(input1);
+	u32 input2_hi = upper_32_bits(input2);
+	u32 input2_lo = lower_32_bits(input2);
+	u64 hv_status;
+
+	__asm__ __volatile__ (CALL_NOSPEC
+			      : "=A"(hv_status),
+			      "+c"(input1_lo), ASM_CALL_CONSTRAINT
+			      :	"A" (control), "b" (input1_hi),
+			      "D"(input2_hi), "S"(input2_lo),
+			      THUNK_TARGET(hv_hypercall_pg)
+			      : "cc");
 	return hv_status;
+#endif
 }
 
 static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,10 +37,6 @@ 
 bool hv_nested;
 struct ms_hyperv_info ms_hyperv;
 
-/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */
-bool hyperv_paravisor_present __ro_after_init;
-EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
-
 #if IS_ENABLED(CONFIG_HYPERV)
 static inline unsigned int hv_get_nested_msr(unsigned int reg)
 {
@@ -287,6 +283,11 @@  static void __init x86_setup_ops_for_tsc
 	old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
 	x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
 }
+
+#ifdef CONFIG_X86_64
+DEFINE_STATIC_CALL(hv_hypercall, hv_pg_hypercall);
+EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
+#endif
 #endif /* CONFIG_HYPERV */
 
 static uint32_t  __init ms_hyperv_platform(void)
@@ -483,14 +484,16 @@  static void __init ms_hyperv_init_platfo
 			ms_hyperv.shared_gpa_boundary =
 				BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
 
-		hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
-
 		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
 			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
 
 
 		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
 			static_branch_enable(&isolation_type_snp);
+#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(CONFIG_HYPERV)
+			if (!ms_hyperv.paravisor_present)
+				static_call_update(hv_hypercall, hv_snp_hypercall);
+#endif
 		} else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
 			static_branch_enable(&isolation_type_tdx);
 
@@ -498,6 +501,9 @@  static void __init ms_hyperv_init_platfo
 			ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
 
 			if (!ms_hyperv.paravisor_present) {
+#if defined(CONFIG_INTEL_TDX_GUEST) && defined(CONFIG_HYPERV)
+				static_call_update(hv_hypercall, hv_tdx_hypercall);
+#endif
 				/*
 				 * Mark the Hyper-V TSC page feature as disabled
 				 * in a TDX VM without paravisor so that the