diff mbox series

[v12,06/11] KVM: vmx/pmu: Expose LBR to guest via MSR_IA32_PERF_CAPABILITIES

Message ID 20200613080958.132489-7-like.xu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Guest Last Branch Recording Enabling | expand

Commit Message

Like Xu June 13, 2020, 8:09 a.m. UTC
The bits [0, 5] of the read-only MSR_IA32_PERF_CAPABILITIES tells about
the record format stored in the LBR records. Userspace could expose guest
LBR when host supports LBR and the exactly supported LBR format value is
initialized to the MSR_IA32_PERF_CAPABILITIES and vcpu model is compatible.

Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
 arch/x86/kvm/vmx/capabilities.h | 11 ++++++-
 arch/x86/kvm/vmx/pmu_intel.c    | 52 +++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/vmx.h          |  6 ++++
 3 files changed, 65 insertions(+), 4 deletions(-)

Comments

Andi Kleen July 8, 2020, 1:36 p.m. UTC | #1
> +	/*
> +	 * As a first step, a guest could only enable LBR feature if its cpu
> +	 * model is the same as the host because the LBR registers would
> +	 * be pass-through to the guest and they're model specific.
> +	 */
> +	if (boot_cpu_data.x86_model != guest_cpuid_model(vcpu))
> +		return false;

Could we relax this in a followon patch? (after this series is merged)

It's enough of the perf cap LBR version matches, don't need full model
number match. This would require a way to configure the LBR version
from qemu.

This would allow more flexibility, for example migration from
Icelake to Skylake and vice versa.

-Andi
Xu, Like July 8, 2020, 2:38 p.m. UTC | #2
On 2020/7/8 21:36, Andi Kleen wrote:
>> +	/*
>> +	 * As a first step, a guest could only enable LBR feature if its cpu
>> +	 * model is the same as the host because the LBR registers would
>> +	 * be pass-through to the guest and they're model specific.
>> +	 */
>> +	if (boot_cpu_data.x86_model != guest_cpuid_model(vcpu))
>> +		return false;
> Could we relax this in a followon patch? (after this series is merged)
Sure, there would be a follow-on patch to relax this check after it's merged.
>
> It's enough of the perf cap LBR version matches, don't need full model
> number match.
I assume you are referring to the LBR_FMT value in the perf_capabilities.
> This would require a way to configure the LBR version
> from qemu.
Sure, I may propose this configuration in the QEMU community.
>
> This would allow more flexibility, for example migration from
> Icelake to Skylake and vice versa.
Yes, we need this flexibility to cover as many platforms as possible.

Thanks,
Like Xu
>
> -Andi
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4bbd8b448d22..b633a90320ee 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -19,6 +19,7 @@  extern int __read_mostly pt_mode;
 #define PT_MODE_HOST_GUEST	1
 
 #define PMU_CAP_FW_WRITES	(1ULL << 13)
+#define PMU_CAP_LBR_FMT		0x3f
 
 struct nested_vmx_msrs {
 	/*
@@ -375,7 +376,15 @@  static inline u64 vmx_get_perf_capabilities(void)
 	 * Since counters are virtualized, KVM would support full
 	 * width counting unconditionally, even if the host lacks it.
 	 */
-	return PMU_CAP_FW_WRITES;
+	u64 perf_cap = PMU_CAP_FW_WRITES;
+
+	if (boot_cpu_has(X86_FEATURE_PDCM))
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
+
+	/* From now on, KVM will support LBR.  */
+	perf_cap |= perf_cap & PMU_CAP_LBR_FMT;
+
+	return perf_cap;
 }
 
 #endif /* __KVM_X86_VMX_CAPS_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index bdcce65c7a1d..a953c7d633f6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -168,6 +168,13 @@  static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
 	return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
 }
 
+static inline bool lbr_is_enabled(struct kvm_vcpu *vcpu)
+{
+	struct x86_pmu_lbr *lbr = &to_vmx(vcpu)->lbr_desc.lbr;
+
+	return lbr->nr && (vcpu->arch.perf_capabilities & PMU_CAP_LBR_FMT);
+}
+
 static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -251,6 +258,30 @@  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	return 1;
 }
 
+static inline bool lbr_fmt_is_matched(u64 data)
+{
+	return (data & PMU_CAP_LBR_FMT) ==
+		(vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT);
+}
+
+static inline bool lbr_is_compatible(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+	if (pmu->version < 2)
+		return false;
+
+	/*
+	 * As a first step, a guest could only enable LBR feature if its cpu
+	 * model is the same as the host because the LBR registers would
+	 * be pass-through to the guest and they're model specific.
+	 */
+	if (boot_cpu_data.x86_model != guest_cpuid_model(vcpu))
+		return false;
+
+	return true;
+}
+
 static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -295,6 +326,14 @@  static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
 			(data & ~vmx_get_perf_capabilities()) : data)
 			return 1;
+		if (data & PMU_CAP_LBR_FMT) {
+			if (!lbr_fmt_is_matched(data))
+				return 1;
+			if (!lbr_is_compatible(vcpu))
+				return 1;
+			if (x86_perf_get_lbr(&to_vmx(vcpu)->lbr_desc.lbr))
+				return 1;
+		}
 		vcpu->arch.perf_capabilities = data;
 		return 0;
 	default:
@@ -337,6 +376,7 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	struct kvm_cpuid_entry2 *entry;
 	union cpuid10_eax eax;
 	union cpuid10_edx edx;
+	struct lbr_desc *lbr_desc = &to_vmx(vcpu)->lbr_desc;
 
 	pmu->nr_arch_gp_counters = 0;
 	pmu->nr_arch_fixed_counters = 0;
@@ -344,7 +384,6 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
 	pmu->version = 0;
 	pmu->reserved_bits = 0xffffffff00200000ull;
-	vcpu->arch.perf_capabilities = 0;
 
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 	if (!entry)
@@ -357,8 +396,6 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 		return;
 
 	perf_get_x86_pmu_capability(&x86_pmu);
-	if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
-		vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
 
 	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
 					 x86_pmu.num_counters_gp);
@@ -397,6 +434,10 @@  static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 	bitmap_set(pmu->all_valid_pmc_idx,
 		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
 
+	if ((vcpu->arch.perf_capabilities & PMU_CAP_LBR_FMT) &&
+	    x86_perf_get_lbr(&lbr_desc->lbr))
+		vcpu->arch.perf_capabilities &= ~PMU_CAP_LBR_FMT;
+
 	nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 }
 
@@ -404,6 +445,7 @@  static void intel_pmu_init(struct kvm_vcpu *vcpu)
 {
 	int i;
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct lbr_desc *lbr_desc = &to_vmx(vcpu)->lbr_desc;
 
 	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
 		pmu->gp_counters[i].type = KVM_PMC_GP;
@@ -418,6 +460,10 @@  static void intel_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
 		pmu->fixed_counters[i].current_config = 0;
 	}
+
+	vcpu->arch.perf_capabilities = guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
+		vmx_get_perf_capabilities() : 0;
+	lbr_desc->lbr.nr = 0;
 }
 
 static void intel_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 8a83b5edc820..ef24338b194d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,11 @@  struct pt_desc {
 	struct pt_ctx guest;
 };
 
+struct lbr_desc {
+	/* Basic information about LBR records. */
+	struct x86_pmu_lbr lbr;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -302,6 +307,7 @@  struct vcpu_vmx {
 	u64 ept_pointer;
 
 	struct pt_desc pt_desc;
+	struct lbr_desc lbr_desc;
 };
 
 enum ept_pointers_status {