diff mbox series

[v4,14/38] KVM: x86/pmu: Introduce enable_mediated_pmu global parameter

Message ID 20250324173121.1275209-15-mizhang@google.com (mailing list archive)
State New
Headers show
Series Mediated vPMU 4.0 for x86 | expand

Commit Message

Mingwei Zhang March 24, 2025, 5:30 p.m. UTC
From: Dapeng Mi <dapeng1.mi@linux.intel.com>

Introduce enable_mediated_pmu global parameter to control if mediated
vPMU can be enabled on KVM level. Even enable_mediated_pmu is set to
true in KVM, user space hypervisor still need to enable mediated vPMU
explicitly by calling KVM_CAP_PMU_CAPABILITY ioctl. This gives
hypervisor flexibility to enable or disable mediated vPMU for each VM.

Mediated vPMU depends on some PMU features on higher PMU version, like
PERF_GLOBAL_STATUS_SET MSR in v4+ for Intel PMU. Thus introduce a
pmu_ops variable MIN_MEDIATED_PMU_VERSION to indicates the minimum host
PMU version which mediated vPMU needs.

Currently enable_mediated_pmu is not exposed to user space as a module
parameter until all mediated vPMU code are in place.

Suggested-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/kvm/pmu.c              |  3 ++-
 arch/x86/kvm/pmu.h              | 11 +++++++++
 arch/x86/kvm/svm/pmu.c          |  1 +
 arch/x86/kvm/vmx/capabilities.h |  3 ++-
 arch/x86/kvm/vmx/pmu_intel.c    |  5 ++++
 arch/x86/kvm/vmx/vmx.c          |  3 ++-
 arch/x86/kvm/x86.c              | 44 ++++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.h              |  1 +
 8 files changed, 64 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 75e9cfc689f8..4f455afe4009 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -775,7 +775,8 @@  void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->pebs_data_cfg_rsvd = ~0ull;
 	bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
 
-	if (!vcpu->kvm->arch.enable_pmu)
+	if (!vcpu->kvm->arch.enable_pmu ||
+	    (!lapic_in_kernel(vcpu) && enable_mediated_pmu))
 		return;
 
 	kvm_pmu_call(refresh)(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ad89d0bd6005..dd45a0c6be74 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -45,6 +45,7 @@  struct kvm_pmu_ops {
 	const u64 EVENTSEL_EVENT;
 	const int MAX_NR_GP_COUNTERS;
 	const int MIN_NR_GP_COUNTERS;
+	const int MIN_MEDIATED_PMU_VERSION;
 };
 
 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops);
@@ -63,6 +64,12 @@  static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
 	return pmu->version > 1;
 }
 
+static inline bool kvm_mediated_pmu_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.enable_pmu &&
+	       enable_mediated_pmu && vcpu_to_pmu(vcpu)->version;
+}
+
 /*
  * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
  * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
@@ -210,6 +217,10 @@  static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
 			enable_pmu = false;
 	}
 
+	if (!enable_pmu || !kvm_pmu_cap.mediated ||
+	    pmu_ops->MIN_MEDIATED_PMU_VERSION > kvm_pmu_cap.version)
+		enable_mediated_pmu = false;
+
 	if (!enable_pmu) {
 		memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
 		return;
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 288f7f2a46f2..c8b9fd9b5350 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -239,4 +239,5 @@  struct kvm_pmu_ops amd_pmu_ops __initdata = {
 	.EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
 	.MAX_NR_GP_COUNTERS = KVM_MAX_NR_AMD_GP_COUNTERS,
 	.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
+	.MIN_MEDIATED_PMU_VERSION = 2,
 };
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index cb6588238f46..fac2c80ddbab 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -390,7 +390,8 @@  static inline bool vmx_pt_mode_is_host_guest(void)
 
 static inline bool vmx_pebs_supported(void)
 {
-	return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
+	return boot_cpu_has(X86_FEATURE_PEBS) &&
+	       !enable_mediated_pmu && kvm_pmu_cap.pebs_ept;
 }
 
 static inline bool cpu_has_notify_vmexit(void)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 77012b2eca0e..425e93d4b1c6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -739,4 +739,9 @@  struct kvm_pmu_ops intel_pmu_ops __initdata = {
 	.EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
 	.MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS,
 	.MIN_NR_GP_COUNTERS = 1,
+	/*
+	 * Intel mediated vPMU support depends on
+	 * MSR_CORE_PERF_GLOBAL_STATUS_SET which is supported from 4+.
+	 */
+	.MIN_MEDIATED_PMU_VERSION = 4,
 };
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 00ac94535c21..a4b5b6455c7b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7916,7 +7916,8 @@  static __init u64 vmx_get_perf_capabilities(void)
 	if (boot_cpu_has(X86_FEATURE_PDCM))
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
 
-	if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+	if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) &&
+	    !enable_mediated_pmu) {
 		x86_perf_get_lbr(&vmx_lbr_caps);
 
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 72995952978a..1ebe169b88b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,6 +188,14 @@  bool __read_mostly enable_pmu = true;
 EXPORT_SYMBOL_GPL(enable_pmu);
 module_param(enable_pmu, bool, 0444);
 
+/*
+ * Enable/disable mediated passthrough PMU virtualization.
+ * Don't expose it to userspace as a module paramerter until
+ * all mediated vPMU code is in place.
+ */
+bool __read_mostly enable_mediated_pmu;
+EXPORT_SYMBOL_GPL(enable_mediated_pmu);
+
 bool __read_mostly eager_page_split = true;
 module_param(eager_page_split, bool, 0644);
 
@@ -6643,9 +6651,28 @@  int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			break;
 
 		mutex_lock(&kvm->lock);
-		if (!kvm->created_vcpus) {
-			kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
-			r = 0;
+		/*
+		 * To keep PMU configuration "simple", setting vPMU support is
+		 * disallowed if vCPUs are created, or if mediated PMU support
+		 * was already enabled for the VM.
+		 */
+		if (!kvm->created_vcpus &&
+		    (!enable_mediated_pmu || !kvm->arch.enable_pmu)) {
+			bool pmu_enable = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
+
+			if (enable_mediated_pmu && pmu_enable) {
+				char *err_msg = "Fail to enable mediated vPMU, " \
+					"please disable system wide perf events or nmi_watchdog " \
+					"(echo 0 > /proc/sys/kernel/nmi_watchdog).\n";
+
+				r = perf_get_mediated_pmu();
+				if (r)
+					kvm_err("%s", err_msg);
+			} else
+				r = 0;
+
+			if (!r)
+				kvm->arch.enable_pmu = pmu_enable;
 		}
 		mutex_unlock(&kvm->lock);
 		break;
@@ -12723,7 +12750,14 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
 	kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
 	kvm->arch.guest_can_read_msr_platform_info = true;
-	kvm->arch.enable_pmu = enable_pmu;
+
+	/*
+	 * PMU virtualization is opt-in when mediated PMU support is enabled.
+	 * KVM_CAP_PMU_CAPABILITY ioctl must be called explicitly to enable
+	 * mediated vPMU. For legacy perf-based vPMU, its behavior isn't changed,
+	 * KVM_CAP_PMU_CAPABILITY ioctl is optional.
+	 */
+	kvm->arch.enable_pmu = enable_pmu && !enable_mediated_pmu;
 
 #if IS_ENABLED(CONFIG_HYPERV)
 	spin_lock_init(&kvm->arch.hv_root_tdp_lock);
@@ -12876,6 +12910,8 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 		__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
 		mutex_unlock(&kvm->slots_lock);
 	}
+	if (kvm->arch.enable_pmu && enable_mediated_pmu)
+		perf_put_mediated_pmu();
 	kvm_unload_vcpu_mmus(kvm);
 	kvm_x86_call(vm_destroy)(kvm);
 	kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 91e50a513100..dbf9973b3d09 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -391,6 +391,7 @@  extern struct kvm_caps kvm_caps;
 extern struct kvm_host_values kvm_host;
 
 extern bool enable_pmu;
+extern bool enable_mediated_pmu;
 
 /*
  * Get a filtered version of KVM's supported XCR0 that strips out dynamic