diff mbox series

[RFC,18/73] KVM: x86/PVM: Implement VM/VCPU initialization related callbacks

Message ID 20240226143630.33643-19-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/PVM: Introduce a new hypervisor | expand

Commit Message

Lai Jiangshan Feb. 26, 2024, 2:35 p.m. UTC
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

In the vm_init() callback, the cloned host root page table is recorded
into the 'kvm' structure, allowing for the cloning of host PGD entries
during SP allocation. In the vcpu_create() callback, the pfn cache for
'PVCS' is initialized and deactivated in the vcpu_free() callback.
Additionally, the vcpu_reset() callback needs to perform a common x86
reset and specific PVM reset.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
---
 arch/x86/kvm/pvm/pvm.c | 120 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pvm/pvm.h |  34 ++++++++++++
 2 files changed, 154 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 83aa2c9f42f6..d4cc52bf6b3f 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -55,6 +55,117 @@  static bool cpu_has_pvm_wbinvd_exit(void)
 	return true;
 }
 
+static void reset_segment(struct kvm_segment *var, int seg)
+{
+	memset(var, 0, sizeof(*var));
+	var->limit = 0xffff;
+	var->present = 1;
+
+	switch (seg) {
+	case VCPU_SREG_CS:
+		var->s = 1;
+		var->type = 0xb; /* Code Segment */
+		var->selector = 0xf000;
+		var->base = 0xffff0000;
+		break;
+	case VCPU_SREG_LDTR:
+		var->s = 0;
+		var->type = DESC_LDT;
+		break;
+	case VCPU_SREG_TR:
+		var->s = 0;
+		var->type = DESC_TSS | 0x2; // TSS32 busy
+		break;
+	default:
+		var->s = 1;
+		var->type = 3; /* Read/Write Data Segment */
+		break;
+	}
+}
+
+static void __pvm_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+
+	if (is_intel)
+		vcpu->arch.microcode_version = 0x100000000ULL;
+	else
+		vcpu->arch.microcode_version = 0x01000065;
+
+	pvm->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
+}
+
+static void pvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	int i;
+
+	kvm_gpc_deactivate(&pvm->pvcs_gpc);
+
+	if (!init_event)
+		__pvm_vcpu_reset(vcpu);
+
+	/*
+	 * For PVM, cpuid faulting relies on hardware capability, but it is set
+	 * as supported by default in kvm_arch_vcpu_create(). Therefore, it
+	 * should be cleared if the host doesn't support it.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
+		vcpu->arch.msr_platform_info &= ~MSR_PLATFORM_INFO_CPUID_FAULT;
+
+	// X86 resets
+	for (i = 0; i < ARRAY_SIZE(pvm->segments); i++)
+		reset_segment(&pvm->segments[i], i);
+	kvm_set_cr8(vcpu, 0);
+	pvm->idt_ptr.address = 0;
+	pvm->idt_ptr.size = 0xffff;
+	pvm->gdt_ptr.address = 0;
+	pvm->gdt_ptr.size = 0xffff;
+
+	// PVM resets
+	pvm->switch_flags = SWITCH_FLAGS_INIT;
+	pvm->hw_cs = __USER_CS;
+	pvm->hw_ss = __USER_DS;
+	pvm->int_shadow = 0;
+	pvm->nmi_mask = false;
+
+	pvm->msr_vcpu_struct = 0;
+	pvm->msr_supervisor_rsp = 0;
+	pvm->msr_event_entry = 0;
+	pvm->msr_retu_rip_plus2 = 0;
+	pvm->msr_rets_rip_plus2 = 0;
+	pvm->msr_switch_cr3 = 0;
+}
+
+static int pvm_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+
+	BUILD_BUG_ON(offsetof(struct vcpu_pvm, vcpu) != 0);
+
+	pvm->switch_flags = SWITCH_FLAGS_INIT;
+	kvm_gpc_init(&pvm->pvcs_gpc, vcpu->kvm, vcpu, KVM_GUEST_AND_HOST_USE_PFN);
+
+	return 0;
+}
+
+static void pvm_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+
+	kvm_gpc_deactivate(&pvm->pvcs_gpc);
+}
+
+static void pvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+{
+}
+
+static int pvm_vm_init(struct kvm *kvm)
+{
+	kvm->arch.host_mmu_root_pgd = host_mmu_root_pgd;
+	return 0;
+}
+
 static int hardware_enable(void)
 {
 	/* Nothing to do */
@@ -169,6 +280,15 @@  static struct kvm_x86_ops pvm_x86_ops __initdata = {
 
 	.has_wbinvd_exit = cpu_has_pvm_wbinvd_exit,
 
+	.vm_size = sizeof(struct kvm_pvm),
+	.vm_init = pvm_vm_init,
+
+	.vcpu_create = pvm_vcpu_create,
+	.vcpu_free = pvm_vcpu_free,
+	.vcpu_reset = pvm_vcpu_reset,
+
+	.vcpu_after_set_cpuid = pvm_vcpu_after_set_cpuid,
+
 	.nested_ops = &pvm_nested_ops,
 
 	.setup_mce = pvm_setup_mce,
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 6149cf5975a4..599bbbb284dc 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -3,6 +3,9 @@ 
 #define __KVM_X86_PVM_H
 
 #include <linux/kvm_host.h>
+#include <asm/switcher.h>
+
+#define SWITCH_FLAGS_INIT	(SWITCH_FLAGS_SMOD)
 
 #define PT_L4_SHIFT		39
 #define PT_L4_SIZE		(1UL << PT_L4_SHIFT)
@@ -24,6 +27,37 @@  int host_mmu_init(void);
 
 struct vcpu_pvm {
 	struct kvm_vcpu vcpu;
+
+	unsigned long switch_flags;
+
+	u32 hw_cs, hw_ss;
+
+	int int_shadow;
+	bool nmi_mask;
+
+	struct gfn_to_pfn_cache pvcs_gpc;
+
+	/*
+	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+	 * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
+	 * in msr_ia32_feature_control_valid_bits.
+	 */
+	u64 msr_ia32_feature_control;
+	u64 msr_ia32_feature_control_valid_bits;
+
+	// PVM paravirt MSRs
+	unsigned long msr_vcpu_struct;
+	unsigned long msr_supervisor_rsp;
+	unsigned long msr_supervisor_redzone;
+	unsigned long msr_event_entry;
+	unsigned long msr_retu_rip_plus2;
+	unsigned long msr_rets_rip_plus2;
+	unsigned long msr_switch_cr3;
+	unsigned long msr_linear_address_range;
+
+	struct kvm_segment segments[NR_VCPU_SREG];
+	struct desc_ptr idt_ptr;
+	struct desc_ptr gdt_ptr;
 };
 
 struct kvm_pvm {