diff mbox series

[RFC,67/73] x86/pvm: Implement cpu related PVOPS

Message ID 20240226143630.33643-68-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/PVM: Introduce a new hypervisor | expand

Commit Message

Lai Jiangshan Feb. 26, 2024, 2:36 p.m. UTC
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

The MSR read/write operations are in the hot path, so use hypercalls in
their PVOPS to enhance performance. Additionally, it is important to
ensure that load_gs_index() and load_tls() notify the hypervisor in
their PVOPS.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
---
 arch/x86/Kconfig      |  1 +
 arch/x86/kernel/pvm.c | 85 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32a2ab49752b..60e28727580a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -855,6 +855,7 @@  config PVM_GUEST
 	bool "PVM Guest support"
 	depends on X86_64 && KVM_GUEST && X86_PIE && !KASAN
 	select PAGE_TABLE_ISOLATION
+	select PARAVIRT_XXL
 	select RANDOMIZE_MEMORY
 	select RELOCATABLE_UNCOMPRESSED_KERNEL
 	default n
diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c
index d39550a8159f..12a35bef9bb8 100644
--- a/arch/x86/kernel/pvm.c
+++ b/arch/x86/kernel/pvm.c
@@ -73,6 +73,81 @@  static __always_inline long pvm_hypercall3(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
+static void pvm_load_gs_index(unsigned int sel)
+{
+	if (sel & 4) {
+		pr_warn_once("pvm guest doesn't support LDT");
+		this_cpu_write(pvm_vcpu_struct.user_gsbase, 0);
+	} else {
+		unsigned long base;
+
+		preempt_disable();
+		base = pvm_hypercall1(PVM_HC_LOAD_GS, sel);
+		__this_cpu_write(pvm_vcpu_struct.user_gsbase, base);
+		preempt_enable();
+	}
+}
+
+static unsigned long long pvm_read_msr_safe(unsigned int msr, int *err)
+{
+	switch (msr) {
+	case MSR_FS_BASE:
+		*err = 0;
+		return rdfsbase();
+	case MSR_KERNEL_GS_BASE:
+		*err = 0;
+		return this_cpu_read(pvm_vcpu_struct.user_gsbase);
+	default:
+		return native_read_msr_safe(msr, err);
+	}
+}
+
+static unsigned long long pvm_read_msr(unsigned int msr)
+{
+	switch (msr) {
+	case MSR_FS_BASE:
+		return rdfsbase();
+	case MSR_KERNEL_GS_BASE:
+		return this_cpu_read(pvm_vcpu_struct.user_gsbase);
+	default:
+		return pvm_hypercall1(PVM_HC_RDMSR, msr);
+	}
+}
+
+static int notrace pvm_write_msr_safe(unsigned int msr, u32 low, u32 high)
+{
+	unsigned long base = ((u64)high << 32) | low;
+
+	switch (msr) {
+	case MSR_FS_BASE:
+		wrfsbase(base);
+		return 0;
+	case MSR_KERNEL_GS_BASE:
+		this_cpu_write(pvm_vcpu_struct.user_gsbase, base);
+		return 0;
+	default:
+		return pvm_hypercall2(PVM_HC_WRMSR, msr, base);
+	}
+}
+
+static void notrace pvm_write_msr(unsigned int msr, u32 low, u32 high)
+{
+	pvm_write_msr_safe(msr, low, high);
+}
+
+static void pvm_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+	unsigned long *tls_array = (unsigned long *)gdt;
+
+	if (memcmp(&gdt[GDT_ENTRY_TLS_MIN], &t->tls_array[0], sizeof(t->tls_array))) {
+		native_load_tls(t, cpu);
+		pvm_hypercall3(PVM_HC_LOAD_TLS, tls_array[GDT_ENTRY_TLS_MIN],
+			       tls_array[GDT_ENTRY_TLS_MIN + 1],
+			       tls_array[GDT_ENTRY_TLS_MIN + 2]);
+	}
+}
+
 void __init pvm_early_event(struct pt_regs *regs)
 {
 	int vector = regs->orig_ax >> 32;
@@ -302,6 +377,16 @@  void __init pvm_early_setup(void)
 	setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST);
 	setup_force_cpu_cap(X86_FEATURE_PV_GUEST);
 
+	/* PVM takes care of %gs when switching to usermode for us */
+	pv_ops.cpu.load_gs_index = pvm_load_gs_index;
+	pv_ops.cpu.cpuid = pvm_cpuid;
+
+	pv_ops.cpu.read_msr = pvm_read_msr;
+	pv_ops.cpu.write_msr = pvm_write_msr;
+	pv_ops.cpu.read_msr_safe = pvm_read_msr_safe;
+	pv_ops.cpu.write_msr_safe = pvm_write_msr_safe;
+	pv_ops.cpu.load_tls = pvm_load_tls;
+
 	wrmsrl(MSR_PVM_VCPU_STRUCT, __pa(this_cpu_ptr(&pvm_vcpu_struct)));
 	wrmsrl(MSR_PVM_EVENT_ENTRY, (unsigned long)(void *)pvm_early_kernel_event_entry - 256);
 	wrmsrl(MSR_PVM_SUPERVISOR_REDZONE, PVM_SUPERVISOR_REDZONE_SIZE);