Message ID | 20220210102603.42764-1-likexu@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86/pmu: Distinguish EVENTSEL bitmasks for uniform event creation and filtering | expand |
On Thu, Feb 10, 2022 at 2:26 AM Like Xu <like.xu.linux@gmail.com> wrote: > > From: Like Xu <likexu@tencent.com> > > The current usage of EVENTSEL_* macro is a mess in the KVM context. Partly > because we have a conceptual ambiguity when choosing to create a RAW or > HARDWARE event: when bits other than HARDWARE_EVENT_MASK are set, > the pmc_reprogram_counter() will use the RAW type. > > By introducing the new macro AMD64_EXTRA_EVENTSEL_EVENT to simplify, > the following three issues can be addressed in one go: > > - the 12 selection bits are used as comparison keys for allow or deny; > - NON_HARDWARE_EVENT_MASK is only used to determine if a HARDWARE > event is programmed or not, a 12-bit selected event will be a RAW event; > (jmattson helped report this issue) > - by reusing AMD64_RAW_EVENT_MASK, the extra 4 selection bits (if set) are > passed to the perf correctly and not filtered out by X86_RAW_EVENT_MASK;. > > Signed-off-by: Like Xu <likexu@tencent.com> > --- > arch/x86/include/asm/perf_event.h | 3 ++- > arch/x86/kvm/pmu.c | 11 ++++------- > arch/x86/kvm/pmu.h | 6 ++++++ > 3 files changed, 12 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h > index 8fc1b5003713..bd068fd19043 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -43,8 +43,9 @@ > #define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ > (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) > > +#define AMD64_EXTRA_EVENTSEL_EVENT (0x0FULL << 32) > #define AMD64_EVENTSEL_EVENT \ > - (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) > + (ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT) > #define INTEL_ARCH_EVENT_MASK \ > (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) > > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 2c98f3ee8df4..99426a8d7f18 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -198,7 +198,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) > > filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); > if (filter) { > - __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; > + __u64 key = eventsel & (INTEL_ARCH_EVENT_MASK | > + AMD64_EXTRA_EVENTSEL_EVENT); > > if (bsearch(&key, filter->events, filter->nevents, > sizeof(__u64), cmp_u64)) > @@ -209,18 +210,14 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) > if (!allow_event) > return; > > - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | > - ARCH_PERFMON_EVENTSEL_INV | > - ARCH_PERFMON_EVENTSEL_CMASK | > - HSW_IN_TX | > - HSW_IN_TX_CHECKPOINTED))) { > + if (!(eventsel & NON_HARDWARE_EVENT_MASK)) { I still don't understand why we even bother doing this lookup in the first place. What's wrong with simply requesting PERF_TYPE_RAW every time? > config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); > if (config != PERF_COUNT_HW_MAX) > type = PERF_TYPE_HARDWARE; > } > > if (type == PERF_TYPE_RAW) > - config = eventsel & X86_RAW_EVENT_MASK; > + config = eventsel & AMD64_RAW_EVENT_MASK; This chunk looks a lot like https://lore.kernel.org/kvm/20220203014813.2130559-2-jmattson@google.com/. Note that if you don't increase the width of config (as in the first change of that series), this mask change is ineffective. > if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) > return; > diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h > index 7a7b8d5b775e..48d867e250bc 100644 > --- a/arch/x86/kvm/pmu.h > +++ b/arch/x86/kvm/pmu.h > @@ -17,6 +17,12 @@ > > #define MAX_FIXED_COUNTERS 3 > > +#define KVM_ARCH_PERFMON_EVENTSEL_IGNORE \ > + (ARCH_PERFMON_EVENTSEL_ANY | ARCH_PERFMON_EVENTSEL_PIN_CONTROL) > + > +#define NON_HARDWARE_EVENT_MASK (AMD64_EXTRA_EVENTSEL_EVENT | \ > + (X86_ALL_EVENT_FLAGS & ~KVM_ARCH_PERFMON_EVENTSEL_IGNORE)) > + > struct kvm_event_hw_type_mapping { > u8 eventsel; > u8 unit_mask; > -- > 2.35.0 >
On 10/2/2022 10:09 pm, Jim Mattson wrote: > On Thu, Feb 10, 2022 at 2:26 AM Like Xu <like.xu.linux@gmail.com> wrote: >> >> From: Like Xu <likexu@tencent.com> >> >> The current usage of EVENTSEL_* macro is a mess in the KVM context. Partly >> because we have a conceptual ambiguity when choosing to create a RAW or >> HARDWARE event: when bits other than HARDWARE_EVENT_MASK are set, >> the pmc_reprogram_counter() will use the RAW type. >> >> By introducing the new macro AMD64_EXTRA_EVENTSEL_EVENT to simplify, >> the following three issues can be addressed in one go: >> >> - the 12 selection bits are used as comparison keys for allow or deny; >> - NON_HARDWARE_EVENT_MASK is only used to determine if a HARDWARE >> event is programmed or not, a 12-bit selected event will be a RAW event; >> (jmattson helped report this issue) >> - by reusing AMD64_RAW_EVENT_MASK, the extra 4 selection bits (if set) are >> passed to the perf correctly and not filtered out by X86_RAW_EVENT_MASK;. >> >> Signed-off-by: Like Xu <likexu@tencent.com> >> --- >> arch/x86/include/asm/perf_event.h | 3 ++- >> arch/x86/kvm/pmu.c | 11 ++++------- >> arch/x86/kvm/pmu.h | 6 ++++++ >> 3 files changed, 12 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h >> index 8fc1b5003713..bd068fd19043 100644 >> --- a/arch/x86/include/asm/perf_event.h >> +++ b/arch/x86/include/asm/perf_event.h >> @@ -43,8 +43,9 @@ >> #define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ >> (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) >> >> +#define AMD64_EXTRA_EVENTSEL_EVENT (0x0FULL << 32) >> #define AMD64_EVENTSEL_EVENT \ >> - (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) >> + (ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT) >> #define INTEL_ARCH_EVENT_MASK \ >> (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) >> >> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c >> index 2c98f3ee8df4..99426a8d7f18 100644 >> --- a/arch/x86/kvm/pmu.c >> +++ b/arch/x86/kvm/pmu.c >> @@ -198,7 +198,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) >> >> filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); >> if (filter) { >> - __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; >> + __u64 key = eventsel & (INTEL_ARCH_EVENT_MASK | >> + AMD64_EXTRA_EVENTSEL_EVENT); >> >> if (bsearch(&key, filter->events, filter->nevents, >> sizeof(__u64), cmp_u64)) >> @@ -209,18 +210,14 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) >> if (!allow_event) >> return; >> >> - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | >> - ARCH_PERFMON_EVENTSEL_INV | >> - ARCH_PERFMON_EVENTSEL_CMASK | >> - HSW_IN_TX | >> - HSW_IN_TX_CHECKPOINTED))) { >> + if (!(eventsel & NON_HARDWARE_EVENT_MASK)) { > > I still don't understand why we even bother doing this lookup in the > first place. What's wrong with simply requesting PERF_TYPE_RAW every > time? Thanks for the constant chasing, I finally got a reply from Peterz: "think so; the HARDWARE is just a convenience wrapper over RAW IIRC". Let me take this step and clean it up a bit.
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8fc1b5003713..bd068fd19043 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -43,8 +43,9 @@ #define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) +#define AMD64_EXTRA_EVENTSEL_EVENT (0x0FULL << 32) #define AMD64_EVENTSEL_EVENT \ - (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) + (ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT) #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2c98f3ee8df4..99426a8d7f18 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -198,7 +198,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); if (filter) { - __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; + __u64 key = eventsel & (INTEL_ARCH_EVENT_MASK | + AMD64_EXTRA_EVENTSEL_EVENT); if (bsearch(&key, filter->events, filter->nevents, sizeof(__u64), cmp_u64)) @@ -209,18 +210,14 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!allow_event) return; - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | - ARCH_PERFMON_EVENTSEL_INV | - ARCH_PERFMON_EVENTSEL_CMASK | - HSW_IN_TX | - HSW_IN_TX_CHECKPOINTED))) { + if (!(eventsel & NON_HARDWARE_EVENT_MASK)) { config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); if (config != PERF_COUNT_HW_MAX) type = PERF_TYPE_HARDWARE; } if (type == PERF_TYPE_RAW) - config = eventsel & X86_RAW_EVENT_MASK; + config = eventsel & AMD64_RAW_EVENT_MASK; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7a7b8d5b775e..48d867e250bc 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -17,6 +17,12 @@ #define MAX_FIXED_COUNTERS 3 +#define KVM_ARCH_PERFMON_EVENTSEL_IGNORE \ + (ARCH_PERFMON_EVENTSEL_ANY | ARCH_PERFMON_EVENTSEL_PIN_CONTROL) + +#define NON_HARDWARE_EVENT_MASK (AMD64_EXTRA_EVENTSEL_EVENT | \ + (X86_ALL_EVENT_FLAGS & ~KVM_ARCH_PERFMON_EVENTSEL_IGNORE)) + struct kvm_event_hw_type_mapping { u8 eventsel; u8 unit_mask;