Message ID | 20240124003858.3954822-3-mizhang@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | minor fix on perf_capabilities in KVM/x86 | expand |
On Wed, Jan 24, 2024, Mingwei Zhang wrote: > Remove vcpu_get_perf_capabilities() helper and directly use the > vcpu->arch.perf_capabilities which now contains the true value of > IA32_PERF_CAPABILITIES if exposed to guest (and 0 otherwise). This should > slightly improve performance by avoiding the runtime check of > X86_FEATURE_PDCM. I have a generic in-progress series[*] to more or less solve the performance woes with guest_cpuid_has(). I would rather keep the current code, even though it's somewhat odd, as it's possible there are setups that rely on KVM checking PDCM. E.g. if userspace sets MSRs *after* CPUID and plugs in a non-zero PERF_CAPABILITES. [*] https://lore.kernel.org/all/20231110235528.1561679-1-seanjc@google.com > Signed-off-by: Mingwei Zhang <mizhang@google.com> > --- > arch/x86/kvm/vmx/pmu_intel.c | 16 ++++------------ > 1 file changed, 4 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > index a6216c874729..7cbee2d16ed9 100644 > --- a/arch/x86/kvm/vmx/pmu_intel.c > +++ b/arch/x86/kvm/vmx/pmu_intel.c > @@ -158,17 +158,9 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, > return &counters[array_index_nospec(idx, num_counters)]; > } > > -static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) > -{ > - if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) > - return 0; > - > - return vcpu->arch.perf_capabilities; > -} > - > static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) > { > - return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; > + return (vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES) != 0; > } > > static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) > @@ -207,13 +199,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) > case MSR_CORE_PERF_FIXED_CTR_CTRL: > return kvm_pmu_has_perf_global_ctrl(pmu); > case MSR_IA32_PEBS_ENABLE: > - ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; > + ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT; > break; > case MSR_IA32_DS_AREA: > ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); > break; > case MSR_PEBS_DATA_CFG: > - perf_capabilities = vcpu_get_perf_capabilities(vcpu); > + perf_capabilities = vcpu->arch.perf_capabilities; > ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) && > ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3); > break; > @@ -577,7 +569,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) > bitmap_set(pmu->all_valid_pmc_idx, > INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); > > - perf_capabilities = vcpu_get_perf_capabilities(vcpu); > + perf_capabilities = vcpu->arch.perf_capabilities; > if (cpuid_model_is_consistent(vcpu) && > (perf_capabilities & PMU_CAP_LBR_FMT)) > x86_perf_get_lbr(&lbr_desc->records); > -- > 2.43.0.429.g432eaa2c6b-goog >
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a6216c874729..7cbee2d16ed9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -158,17 +158,9 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, return &counters[array_index_nospec(idx, num_counters)]; } -static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) -{ - if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) - return 0; - - return vcpu->arch.perf_capabilities; -} - static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; + return (vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES) != 0; } static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) @@ -207,13 +199,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_FIXED_CTR_CTRL: return kvm_pmu_has_perf_global_ctrl(pmu); case MSR_IA32_PEBS_ENABLE: - ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; + ret = vcpu->arch.perf_capabilities & PERF_CAP_PEBS_FORMAT; break; case MSR_IA32_DS_AREA: ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); break; case MSR_PEBS_DATA_CFG: - perf_capabilities = vcpu_get_perf_capabilities(vcpu); + perf_capabilities = vcpu->arch.perf_capabilities; ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) && ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3); break; @@ -577,7 +569,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); - perf_capabilities = vcpu_get_perf_capabilities(vcpu); + perf_capabilities = vcpu->arch.perf_capabilities; if (cpuid_model_is_consistent(vcpu) && (perf_capabilities & PMU_CAP_LBR_FMT)) x86_perf_get_lbr(&lbr_desc->records);
Remove vcpu_get_perf_capabilities() helper and directly use the vcpu->arch.perf_capabilities which now contains the true value of IA32_PERF_CAPABILITIES if exposed to guest (and 0 otherwise). This should slightly improve performance by avoiding the runtime check of X86_FEATURE_PDCM. Signed-off-by: Mingwei Zhang <mizhang@google.com> --- arch/x86/kvm/vmx/pmu_intel.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-)