@@ -775,7 +775,8 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_rsvd = ~0ull;
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- if (!vcpu->kvm->arch.enable_pmu)
+ if (!vcpu->kvm->arch.enable_pmu ||
+ (!lapic_in_kernel(vcpu) && enable_mediated_pmu))
return;
kvm_pmu_call(refresh)(vcpu);
@@ -45,6 +45,7 @@ struct kvm_pmu_ops {
const u64 EVENTSEL_EVENT;
const int MAX_NR_GP_COUNTERS;
const int MIN_NR_GP_COUNTERS;
+ const int MIN_MEDIATED_PMU_VERSION;
};
void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops);
@@ -63,6 +64,12 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
return pmu->version > 1;
}
+static inline bool kvm_mediated_pmu_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.enable_pmu &&
+ enable_mediated_pmu && vcpu_to_pmu(vcpu)->version;
+}
+
/*
* KVM tracks all counters in 64-bit bitmaps, with general purpose counters
* mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
@@ -210,6 +217,10 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
enable_pmu = false;
}
+ if (!enable_pmu || !kvm_pmu_cap.mediated ||
+ pmu_ops->MIN_MEDIATED_PMU_VERSION > kvm_pmu_cap.version)
+ enable_mediated_pmu = false;
+
if (!enable_pmu) {
memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
return;
@@ -239,4 +239,5 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
.EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
.MAX_NR_GP_COUNTERS = KVM_MAX_NR_AMD_GP_COUNTERS,
.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
+ .MIN_MEDIATED_PMU_VERSION = 2,
};
@@ -390,7 +390,8 @@ static inline bool vmx_pt_mode_is_host_guest(void)
static inline bool vmx_pebs_supported(void)
{
- return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
+ return boot_cpu_has(X86_FEATURE_PEBS) &&
+ !enable_mediated_pmu && kvm_pmu_cap.pebs_ept;
}
static inline bool cpu_has_notify_vmexit(void)
@@ -739,4 +739,9 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
.EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
.MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS,
.MIN_NR_GP_COUNTERS = 1,
+ /*
+ * Intel mediated vPMU support depends on
+ * MSR_CORE_PERF_GLOBAL_STATUS_SET which is supported from 4+.
+ */
+ .MIN_MEDIATED_PMU_VERSION = 4,
};
@@ -7916,7 +7916,8 @@ static __init u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) &&
+ !enable_mediated_pmu) {
x86_perf_get_lbr(&vmx_lbr_caps);
/*
@@ -188,6 +188,14 @@ bool __read_mostly enable_pmu = true;
EXPORT_SYMBOL_GPL(enable_pmu);
module_param(enable_pmu, bool, 0444);
+/*
+ * Enable/disable mediated passthrough PMU virtualization.
+ * Don't expose it to userspace as a module paramerter until
+ * all mediated vPMU code is in place.
+ */
+bool __read_mostly enable_mediated_pmu;
+EXPORT_SYMBOL_GPL(enable_mediated_pmu);
+
bool __read_mostly eager_page_split = true;
module_param(eager_page_split, bool, 0644);
@@ -6643,9 +6651,28 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
mutex_lock(&kvm->lock);
- if (!kvm->created_vcpus) {
- kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
- r = 0;
+ /*
+ * To keep PMU configuration "simple", setting vPMU support is
+ * disallowed if vCPUs are created, or if mediated PMU support
+ * was already enabled for the VM.
+ */
+ if (!kvm->created_vcpus &&
+ (!enable_mediated_pmu || !kvm->arch.enable_pmu)) {
+ bool pmu_enable = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
+
+ if (enable_mediated_pmu && pmu_enable) {
+ char *err_msg = "Fail to enable mediated vPMU, " \
+ "please disable system wide perf events or nmi_watchdog " \
+ "(echo 0 > /proc/sys/kernel/nmi_watchdog).\n";
+
+ r = perf_get_mediated_pmu();
+ if (r)
+ kvm_err("%s", err_msg);
+ } else
+ r = 0;
+
+ if (!r)
+ kvm->arch.enable_pmu = pmu_enable;
}
mutex_unlock(&kvm->lock);
break;
@@ -12723,7 +12750,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
kvm->arch.guest_can_read_msr_platform_info = true;
- kvm->arch.enable_pmu = enable_pmu;
+
+ /*
+ * PMU virtualization is opt-in when mediated PMU support is enabled.
+ * KVM_CAP_PMU_CAPABILITY ioctl must be called explicitly to enable
+ * mediated vPMU. For legacy perf-based vPMU, its behavior isn't changed,
+ * KVM_CAP_PMU_CAPABILITY ioctl is optional.
+ */
+ kvm->arch.enable_pmu = enable_pmu && !enable_mediated_pmu;
#if IS_ENABLED(CONFIG_HYPERV)
spin_lock_init(&kvm->arch.hv_root_tdp_lock);
@@ -12876,6 +12910,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
mutex_unlock(&kvm->slots_lock);
}
+ if (kvm->arch.enable_pmu && enable_mediated_pmu)
+ perf_put_mediated_pmu();
kvm_unload_vcpu_mmus(kvm);
kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
@@ -391,6 +391,7 @@ extern struct kvm_caps kvm_caps;
extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
+extern bool enable_mediated_pmu;
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic