diff mbox series

KVM: x86/pmu: Distinguish EVENTSEL bitmasks for uniform event creation and filtering

Message ID 20220210102603.42764-1-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Distinguish EVENTSEL bitmasks for uniform event creation and filtering | expand

Commit Message

Like Xu Feb. 10, 2022, 10:26 a.m. UTC
From: Like Xu <likexu@tencent.com>

The current usage of EVENTSEL_* macro is a mess in the KVM context. Partly
because we have a conceptual ambiguity when choosing to create a RAW or
HARDWARE event: when bits other than HARDWARE_EVENT_MASK are set,
the pmc_reprogram_counter() will use the RAW type.

By introducing the new macro AMD64_EXTRA_EVENTSEL_EVENT to simplify,
the following three issues can be addressed in one go:

- the 12 selection bits are used as comparison keys for allow or deny;
- NON_HARDWARE_EVENT_MASK is only used to determine if a HARDWARE
  event is programmed or not, a 12-bit selected event will be a RAW event;
  (jmattson helped report this issue)
- by reusing AMD64_RAW_EVENT_MASK, the extra 4 selection bits (if set) are
  passed to the perf correctly and not filtered out by X86_RAW_EVENT_MASK;.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/include/asm/perf_event.h |  3 ++-
 arch/x86/kvm/pmu.c                | 11 ++++-------
 arch/x86/kvm/pmu.h                |  6 ++++++
 3 files changed, 12 insertions(+), 8 deletions(-)

Comments

Jim Mattson Feb. 10, 2022, 2:09 p.m. UTC | #1
On Thu, Feb 10, 2022 at 2:26 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Like Xu <likexu@tencent.com>
>
> The current usage of EVENTSEL_* macro is a mess in the KVM context. Partly
> because we have a conceptual ambiguity when choosing to create a RAW or
> HARDWARE event: when bits other than HARDWARE_EVENT_MASK are set,
> the pmc_reprogram_counter() will use the RAW type.
>
> By introducing the new macro AMD64_EXTRA_EVENTSEL_EVENT to simplify,
> the following three issues can be addressed in one go:
>
> - the 12 selection bits are used as comparison keys for allow or deny;
> - NON_HARDWARE_EVENT_MASK is only used to determine if a HARDWARE
>   event is programmed or not, a 12-bit selected event will be a RAW event;
>   (jmattson helped report this issue)
> - by reusing AMD64_RAW_EVENT_MASK, the extra 4 selection bits (if set) are
>   passed to the perf correctly and not filtered out by X86_RAW_EVENT_MASK;.
>
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  arch/x86/include/asm/perf_event.h |  3 ++-
>  arch/x86/kvm/pmu.c                | 11 ++++-------
>  arch/x86/kvm/pmu.h                |  6 ++++++
>  3 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8fc1b5003713..bd068fd19043 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -43,8 +43,9 @@
>  #define AMD64_EVENTSEL_INT_CORE_SEL_MASK               \
>         (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
>
> +#define AMD64_EXTRA_EVENTSEL_EVENT                             (0x0FULL << 32)
>  #define AMD64_EVENTSEL_EVENT   \
> -       (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
> +       (ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT)
>  #define INTEL_ARCH_EVENT_MASK  \
>         (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 2c98f3ee8df4..99426a8d7f18 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -198,7 +198,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>
>         filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
>         if (filter) {
> -               __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
> +               __u64 key = eventsel & (INTEL_ARCH_EVENT_MASK |
> +                                       AMD64_EXTRA_EVENTSEL_EVENT);
>
>                 if (bsearch(&key, filter->events, filter->nevents,
>                             sizeof(__u64), cmp_u64))
> @@ -209,18 +210,14 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>         if (!allow_event)
>                 return;
>
> -       if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
> -                         ARCH_PERFMON_EVENTSEL_INV |
> -                         ARCH_PERFMON_EVENTSEL_CMASK |
> -                         HSW_IN_TX |
> -                         HSW_IN_TX_CHECKPOINTED))) {
> +       if (!(eventsel & NON_HARDWARE_EVENT_MASK)) {

I still don't understand why we even bother doing this lookup in the
first place. What's wrong with simply requesting PERF_TYPE_RAW every
time?

>                 config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
>                 if (config != PERF_COUNT_HW_MAX)
>                         type = PERF_TYPE_HARDWARE;
>         }
>
>         if (type == PERF_TYPE_RAW)
> -               config = eventsel & X86_RAW_EVENT_MASK;
> +               config = eventsel & AMD64_RAW_EVENT_MASK;

This chunk looks a lot like
https://lore.kernel.org/kvm/20220203014813.2130559-2-jmattson@google.com/.
Note that if you don't increase the width of config (as in the first
change of that series), this mask change is ineffective.

>         if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
>                 return;
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 7a7b8d5b775e..48d867e250bc 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -17,6 +17,12 @@
>
>  #define MAX_FIXED_COUNTERS     3
>
> +#define KVM_ARCH_PERFMON_EVENTSEL_IGNORE \
> +       (ARCH_PERFMON_EVENTSEL_ANY | ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
> +
> +#define NON_HARDWARE_EVENT_MASK        (AMD64_EXTRA_EVENTSEL_EVENT | \
> +        (X86_ALL_EVENT_FLAGS & ~KVM_ARCH_PERFMON_EVENTSEL_IGNORE))
> +
>  struct kvm_event_hw_type_mapping {
>         u8 eventsel;
>         u8 unit_mask;
> --
> 2.35.0
>
Like Xu Feb. 17, 2022, 9:05 a.m. UTC | #2
On 10/2/2022 10:09 pm, Jim Mattson wrote:
> On Thu, Feb 10, 2022 at 2:26 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> From: Like Xu <likexu@tencent.com>
>>
>> The current usage of EVENTSEL_* macro is a mess in the KVM context. Partly
>> because we have a conceptual ambiguity when choosing to create a RAW or
>> HARDWARE event: when bits other than HARDWARE_EVENT_MASK are set,
>> the pmc_reprogram_counter() will use the RAW type.
>>
>> By introducing the new macro AMD64_EXTRA_EVENTSEL_EVENT to simplify,
>> the following three issues can be addressed in one go:
>>
>> - the 12 selection bits are used as comparison keys for allow or deny;
>> - NON_HARDWARE_EVENT_MASK is only used to determine if a HARDWARE
>>    event is programmed or not, a 12-bit selected event will be a RAW event;
>>    (jmattson helped report this issue)
>> - by reusing AMD64_RAW_EVENT_MASK, the extra 4 selection bits (if set) are
>>    passed to the perf correctly and not filtered out by X86_RAW_EVENT_MASK;.
>>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> ---
>>   arch/x86/include/asm/perf_event.h |  3 ++-
>>   arch/x86/kvm/pmu.c                | 11 ++++-------
>>   arch/x86/kvm/pmu.h                |  6 ++++++
>>   3 files changed, 12 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index 8fc1b5003713..bd068fd19043 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -43,8 +43,9 @@
>>   #define AMD64_EVENTSEL_INT_CORE_SEL_MASK               \
>>          (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
>>
>> +#define AMD64_EXTRA_EVENTSEL_EVENT                             (0x0FULL << 32)
>>   #define AMD64_EVENTSEL_EVENT   \
>> -       (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
>> +       (ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT)
>>   #define INTEL_ARCH_EVENT_MASK  \
>>          (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index 2c98f3ee8df4..99426a8d7f18 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -198,7 +198,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>>
>>          filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
>>          if (filter) {
>> -               __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
>> +               __u64 key = eventsel & (INTEL_ARCH_EVENT_MASK |
>> +                                       AMD64_EXTRA_EVENTSEL_EVENT);
>>
>>                  if (bsearch(&key, filter->events, filter->nevents,
>>                              sizeof(__u64), cmp_u64))
>> @@ -209,18 +210,14 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>>          if (!allow_event)
>>                  return;
>>
>> -       if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>> -                         ARCH_PERFMON_EVENTSEL_INV |
>> -                         ARCH_PERFMON_EVENTSEL_CMASK |
>> -                         HSW_IN_TX |
>> -                         HSW_IN_TX_CHECKPOINTED))) {
>> +       if (!(eventsel & NON_HARDWARE_EVENT_MASK)) {
> 
> I still don't understand why we even bother doing this lookup in the
> first place. What's wrong with simply requesting PERF_TYPE_RAW every
> time?

Thanks for the constant chasing, I finally got a reply from Peterz:
"think so; the HARDWARE is just a convenience wrapper over RAW IIRC".

Let me take this step and clean it up a bit.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc1b5003713..bd068fd19043 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -43,8 +43,9 @@ 
 #define AMD64_EVENTSEL_INT_CORE_SEL_MASK		\
 	(0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
 
+#define AMD64_EXTRA_EVENTSEL_EVENT				(0x0FULL << 32)
 #define AMD64_EVENTSEL_EVENT	\
-	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
+	(ARCH_PERFMON_EVENTSEL_EVENT | AMD64_EXTRA_EVENTSEL_EVENT)
 #define INTEL_ARCH_EVENT_MASK	\
 	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
 
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2c98f3ee8df4..99426a8d7f18 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -198,7 +198,8 @@  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 
 	filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
 	if (filter) {
-		__u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
+		__u64 key = eventsel & (INTEL_ARCH_EVENT_MASK |
+					AMD64_EXTRA_EVENTSEL_EVENT);
 
 		if (bsearch(&key, filter->events, filter->nevents,
 			    sizeof(__u64), cmp_u64))
@@ -209,18 +210,14 @@  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (!allow_event)
 		return;
 
-	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
-			  ARCH_PERFMON_EVENTSEL_INV |
-			  ARCH_PERFMON_EVENTSEL_CMASK |
-			  HSW_IN_TX |
-			  HSW_IN_TX_CHECKPOINTED))) {
+	if (!(eventsel & NON_HARDWARE_EVENT_MASK)) {
 		config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
 		if (config != PERF_COUNT_HW_MAX)
 			type = PERF_TYPE_HARDWARE;
 	}
 
 	if (type == PERF_TYPE_RAW)
-		config = eventsel & X86_RAW_EVENT_MASK;
+		config = eventsel & AMD64_RAW_EVENT_MASK;
 
 	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
 		return;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7a7b8d5b775e..48d867e250bc 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -17,6 +17,12 @@ 
 
 #define MAX_FIXED_COUNTERS	3
 
+#define KVM_ARCH_PERFMON_EVENTSEL_IGNORE \
+	(ARCH_PERFMON_EVENTSEL_ANY | ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+
+#define NON_HARDWARE_EVENT_MASK	(AMD64_EXTRA_EVENTSEL_EVENT | \
+	 (X86_ALL_EVENT_FLAGS & ~KVM_ARCH_PERFMON_EVENTSEL_IGNORE))
+
 struct kvm_event_hw_type_mapping {
 	u8 eventsel;
 	u8 unit_mask;