[RFC,v3,11/13] KVM: x86/pmu: Support topdown perf metrics feature

Message ID	20230822051140.512879-12-dapeng1.mi@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@vger.kernel.org> From: Dapeng Mi <dapeng1.mi@linux.intel.com> To: Sean Christopherson <seanjc@google.com>, Paolo Bonzini <pbonzini@redhat.com>, Peter Zijlstra <peterz@infradead.org>, Arnaldo Carvalho de Melo <acme@kernel.org>, Kan Liang <kan.liang@linux.intel.com>, Like Xu <likexu@tencent.com>, Mark Rutland <mark.rutland@arm.com>, Alexander Shishkin <alexander.shishkin@linux.intel.com>, Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>, Ian Rogers <irogers@google.com>, Adrian Hunter <adrian.hunter@intel.com> Cc: kvm@vger.kernel.org, linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org, Zhenyu Wang <zhenyuw@linux.intel.com>, Zhang Xiong <xiong.y.zhang@intel.com>, Lv Zhiyuan <zhiyuan.lv@intel.com>, Yang Weijiang <weijiang.yang@intel.com>, Dapeng Mi <dapeng1.mi@intel.com>, Dapeng Mi <dapeng1.mi@linux.intel.com> Subject: [PATCH RFC v3 11/13] KVM: x86/pmu: Support topdown perf metrics feature Date: Tue, 22 Aug 2023 13:11:38 +0800 Message-Id: <20230822051140.512879-12-dapeng1.mi@linux.intel.com> In-Reply-To: <20230822051140.512879-1-dapeng1.mi@linux.intel.com> References: <20230822051140.512879-1-dapeng1.mi@linux.intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Enable fixed counter 3 and topdown perf metrics for vPMU \| expand [RFC,v3,00/13] Enable fixed counter 3 and topdown perf metrics for vPMU [RFC,v3,01/13] KVM: x86/pmu: Add Intel CPUID-hinted TopDown slots event [RFC,v3,02/13] KVM: x86/pmu: Support PMU fixed counter 3 [RFC,v3,03/13] perf/core: Add function perf_event_group_leader_check() [RFC,v3,04/13] perf/core: Add function perf_event_move_group() [RFC,v3,05/13] perf/core: Add *group_leader for perf_event_create_group_kernel_counters() [RFC,v3,06/13] perf/x86: Fix typos and inconsistent indents in perf_event header [RFC,v3,07/13] perf/x86: Add constraint for guest perf metrics event [RFC,v3,08/13] perf/core: Add new function perf_event_topdown_metrics() [RFC,v3,09/13] perf/x86/intel: Handle KVM virtual metrics event in perf system [RFC,v3,10/13] KVM: x86/pmu: Extend pmc_reprogram_counter() to create group events [RFC,v3,11/13] KVM: x86/pmu: Support topdown perf metrics feature [RFC,v3,12/13] KVM: x86/pmu: Handle PERF_METRICS overflow [RFC,v3,13/13] KVM: x86/pmu: Expose Topdown in MSR_IA32_PERF_CAPABILITIES

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 235e24fe66a4..d037259c6887 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -487,6 +487,12 @@ enum pmc_type { KVM_PMC_FIXED, }; +enum topdown_events { + KVM_TD_SLOTS = 0, + KVM_TD_METRICS = 1, + KVM_TD_EVENTS_MAX = 2, +}; + struct kvm_pmc { enum pmc_type type; u8 idx; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index b02a56c77647..fad7b2c10bb8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -182,6 +182,30 @@ static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc) return 1; } +static void pmc_setup_td_metrics_events_attr(struct kvm_pmc *pmc, + struct perf_event_attr *attr, + unsigned int event_idx) +{ + if (!pmc_is_topdown_metrics_used(pmc)) + return; + + /* + * setup slots event attribute, when slots event is + * created for guest topdown metrics profiling, the + * sample period must be 0. + */ + if (event_idx == KVM_TD_SLOTS) + attr->sample_period = 0; + + /* setup vmetrics event attribute */ + if (event_idx == KVM_TD_METRICS) { + attr->config = INTEL_FIXED_VMETRICS_EVENT; + attr->sample_period = 0; + /* Only group leader event can be pinned. */ + attr->pinned = false; + } +} + static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, bool exclude_user, bool exclude_kernel, bool intr) @@ -233,6 +257,8 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, for (i = 0; i < pmc->max_nr_events; i++) { group_leader = i ? pmc->perf_event : NULL; + pmc_setup_td_metrics_events_attr(pmc, &attr, i); + event = perf_event_create_kernel_counter(&attr, -1, current, group_leader, kvm_perf_overflow, pmc); @@ -256,6 +282,12 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, pmc->is_paused = false; pmc->intr = intr || pebs; + if (pmc_is_topdown_metrics_active(pmc)) { + pmc_update_topdown_metrics(pmc); + /* KVM need to inject PMI for PERF_METRICS overflow. */ + pmc->intr = true; + } + if (!attr.disabled) return 0; @@ -269,6 +301,7 @@ static void pmc_pause_counter(struct kvm_pmc *pmc) { u64 counter = pmc->counter; unsigned int i; + u64 data; if (!pmc->perf_event || pmc->is_paused) return; @@ -279,8 +312,15 @@ static void pmc_pause_counter(struct kvm_pmc *pmc) * then disable non-group leader events. */ counter += perf_event_pause(pmc->perf_event, true); - for (i = 1; pmc->perf_events[i] && i < pmc->max_nr_events; i++) - perf_event_pause(pmc->perf_events[i], true); + for (i = 1; pmc->perf_events[i] && i < pmc->max_nr_events; i++) { + data = perf_event_pause(pmc->perf_events[i], true); + /* + * The count of vmetrics event actually stores raw data of + * PERF_METRICS, save it to extra_config. + */ + if (pmc->idx == INTEL_PMC_IDX_FIXED_SLOTS && i == KVM_TD_METRICS) + pmc->extra_config = data; + } pmc->counter = counter & pmc_bitmask(pmc); pmc->is_paused = true; } @@ -557,6 +597,21 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) return 0; } +static inline int kvm_pmu_read_perf_metrics(struct kvm_vcpu *vcpu, + unsigned int idx, u64 *data) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR3); + + if (!pmc) { + *data = 0; + return 1; + } + + *data = pmc->extra_config; + return 0; +} + int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); @@ -570,6 +625,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); + if (idx & INTEL_PMC_FIXED_RDPMC_METRICS) + return kvm_pmu_read_perf_metrics(vcpu, idx, data); + pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask); if (!pmc) return 1; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 3dc0deb83096..43abe793c11c 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -257,6 +257,34 @@ static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); } +static inline int pmc_is_topdown_metrics_used(struct kvm_pmc *pmc) +{ + return (pmc->idx == INTEL_PMC_IDX_FIXED_SLOTS) && + (pmc->max_nr_events == KVM_TD_EVENTS_MAX); +} + +static inline int pmc_is_topdown_metrics_active(struct kvm_pmc *pmc) +{ + return pmc_is_topdown_metrics_used(pmc) && + pmc->perf_events[KVM_TD_METRICS]; +} + +static inline void pmc_update_topdown_metrics(struct kvm_pmc *pmc) +{ + struct perf_event *event; + int i; + + struct td_metrics td_metrics = { + .slots = pmc->counter, + .metric = pmc->extra_config, + }; + + for (i = 0; i < pmc->max_nr_events; i++) { + event = pmc->perf_events[i]; + perf_event_topdown_metrics(event, &td_metrics); + } +} + void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..d8317552b634 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -22,6 +22,7 @@ extern int __read_mostly pt_mode; #define PT_MODE_HOST_GUEST 1 #define PMU_CAP_FW_WRITES (1ULL << 13) +#define PMU_CAP_PERF_METRICS BIT_ULL(15) #define PMU_CAP_LBR_FMT 0x3f struct nested_vmx_msrs { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index b45396e0a46c..04ccb8c6f7e4 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -229,6 +229,9 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) && ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3); break; + case MSR_PERF_METRICS: + ret = intel_pmu_metrics_is_enabled(vcpu) && (pmu->version > 1); + break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || @@ -357,6 +360,43 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu, return true; } +static int intel_pmu_handle_perf_metrics_access(struct kvm_vcpu *vcpu, + struct msr_data *msr_info, bool read) +{ + u32 index = msr_info->index; + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR3); + + if (!pmc || index != MSR_PERF_METRICS) + return 1; + + if (read) { + msr_info->data = pmc->extra_config; + } else { + /* + * Save guest PERF_METRICS data in to extra_config, + * the extra_config would be read to write to PERF_METRICS + * MSR in later events group creating process. + */ + pmc->extra_config = msr_info->data; + if (pmc_is_topdown_metrics_active(pmc)) { + pmc_update_topdown_metrics(pmc); + } else { + /* + * If the slots/vmetrics events group is not + * created yet, set max_nr_events to 2 + * (slots event + vmetrics event), so KVM knows + * topdown metrics profiling is running in guest + * and slots/vmetrics events group would be created + * later. + */ + pmc->max_nr_events = KVM_TD_EVENTS_MAX; + } + } + + return 0; +} + static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -376,6 +416,10 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_PEBS_DATA_CFG: msr_info->data = pmu->pebs_data_cfg; break; + case MSR_PERF_METRICS: + if (intel_pmu_handle_perf_metrics_access(vcpu, msr_info, true)) + return 1; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -438,6 +482,10 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pmu->pebs_data_cfg = data; break; + case MSR_PERF_METRICS: + if (intel_pmu_handle_perf_metrics_access(vcpu, msr_info, false)) + return 1; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index c2130d2c8e24..63b6dcc360c2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -670,6 +670,11 @@ static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu) return !!vcpu_to_lbr_records(vcpu)->nr; } +static inline bool intel_pmu_metrics_is_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.perf_capabilities & PMU_CAP_PERF_METRICS; +} + void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu); int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu); void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 95b1ac3bc0b6..5d9fde90370a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1463,6 +1463,7 @@ static const u32 msrs_to_save_pmu[] = { MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + MSR_PERF_METRICS, /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,

[RFC,v3,11/13] KVM: x86/pmu: Support topdown perf metrics feature

Commit Message

Patch