diff mbox series

[v2,2/6] KVM: x86/pmu: Refactoring find_arch_event() to pmc_perf_hw_id()

Message ID 20211130074221.93635-3-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Count two basic events for emulated instructions | expand

Commit Message

Like Xu Nov. 30, 2021, 7:42 a.m. UTC
From: Like Xu <likexu@tencent.com>

The find_arch_event() returns a "unsigned int" value,
which is used by the pmc_reprogram_counter() to
program a PERF_TYPE_HARDWARE type perf_event.

The returned value is actually the kernel defined gernic
perf_hw_id, let's rename it to pmc_perf_hw_id() with simpler
incoming parameters for better self-explanation.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/kvm/pmu.c           | 8 +-------
 arch/x86/kvm/pmu.h           | 3 +--
 arch/x86/kvm/svm/pmu.c       | 8 ++++----
 arch/x86/kvm/vmx/pmu_intel.c | 9 +++++----
 4 files changed, 11 insertions(+), 17 deletions(-)

Comments

Jim Mattson Dec. 9, 2021, 3:52 a.m. UTC | #1
On Mon, Nov 29, 2021 at 11:42 PM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Like Xu <likexu@tencent.com>
>
> The find_arch_event() returns a "unsigned int" value,
> which is used by the pmc_reprogram_counter() to
> program a PERF_TYPE_HARDWARE type perf_event.
>
> The returned value is actually the kernel defined generic

Typo: generic.

> perf_hw_id, let's rename it to pmc_perf_hw_id() with simpler
> incoming parameters for better self-explanation.
>
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  arch/x86/kvm/pmu.c           | 8 +-------
>  arch/x86/kvm/pmu.h           | 3 +--
>  arch/x86/kvm/svm/pmu.c       | 8 ++++----
>  arch/x86/kvm/vmx/pmu_intel.c | 9 +++++----
>  4 files changed, 11 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 09873f6488f7..3b3ccf5b1106 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -174,7 +174,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>  {
>         unsigned config, type = PERF_TYPE_RAW;
> -       u8 event_select, unit_mask;
>         struct kvm *kvm = pmc->vcpu->kvm;
>         struct kvm_pmu_event_filter *filter;
>         int i;
> @@ -206,17 +205,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>         if (!allow_event)
>                 return;
>
> -       event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> -       unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
> -
>         if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>                           ARCH_PERFMON_EVENTSEL_INV |
>                           ARCH_PERFMON_EVENTSEL_CMASK |
>                           HSW_IN_TX |
>                           HSW_IN_TX_CHECKPOINTED))) {

The mechanics of the change look fine, but I do have some questions,
for my own understanding.

Why don't we just use PERF_TYPE_RAW for guest counters all of the
time? What is the advantage of matching entries in a table so that we
can use PERF_TYPE_HARDWARE?

Why do the HSW_IN_TX* bits result in bypassing this clause, when these
bits are extracted as arguments to pmc_reprogram_counter below?

Reviewed-by: Jim Mattson <jmattson@google.com>
Like Xu Dec. 9, 2021, 8:11 a.m. UTC | #2
On 9/12/2021 11:52 am, Jim Mattson wrote:
> On Mon, Nov 29, 2021 at 11:42 PM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> From: Like Xu <likexu@tencent.com>
>>
>> The find_arch_event() returns a "unsigned int" value,
>> which is used by the pmc_reprogram_counter() to
>> program a PERF_TYPE_HARDWARE type perf_event.
>>
>> The returned value is actually the kernel defined generic
> 
> Typo: generic.
> 
>> perf_hw_id, let's rename it to pmc_perf_hw_id() with simpler
>> incoming parameters for better self-explanation.
>>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> ---
>>   arch/x86/kvm/pmu.c           | 8 +-------
>>   arch/x86/kvm/pmu.h           | 3 +--
>>   arch/x86/kvm/svm/pmu.c       | 8 ++++----
>>   arch/x86/kvm/vmx/pmu_intel.c | 9 +++++----
>>   4 files changed, 11 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
>> index 09873f6488f7..3b3ccf5b1106 100644
>> --- a/arch/x86/kvm/pmu.c
>> +++ b/arch/x86/kvm/pmu.c
>> @@ -174,7 +174,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>>   void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>>   {
>>          unsigned config, type = PERF_TYPE_RAW;
>> -       u8 event_select, unit_mask;
>>          struct kvm *kvm = pmc->vcpu->kvm;
>>          struct kvm_pmu_event_filter *filter;
>>          int i;
>> @@ -206,17 +205,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>>          if (!allow_event)
>>                  return;
>>
>> -       event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
>> -       unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>> -
>>          if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>>                            ARCH_PERFMON_EVENTSEL_INV |
>>                            ARCH_PERFMON_EVENTSEL_CMASK |
>>                            HSW_IN_TX |
>>                            HSW_IN_TX_CHECKPOINTED))) {
> 
> The mechanics of the change look fine, but I do have some questions,
> for my own understanding.
> 
> Why don't we just use PERF_TYPE_RAW for guest counters all of the
> time? What is the advantage of matching entries in a table so that we
> can use PERF_TYPE_HARDWARE?

The first reason is we need PERF_TYPE_HARDWARE for fixed counters.

And then we might wonder whether we can create perf-event faster
using PERF_TYPE_HARDWARE compared to PERF_TYPE_HARDWARE.

But the (current) answer is no, and probably the opposite:

# The cost (nanosecond) of calling perf_event_create_kernel_counter()
PERF_TYPE_RAW
Max= 1072211
Min= 11122
Avg= 41681.7

PERF_TYPE_HARDWARE
Max= 46184215
Min= 16194
Avg= 250650

So why don't we just use PERF_TYPE_RAW for just all gp counters ?

Hi Peter, do you have any comments to invalidate this proposal ?

> 
> Why do the HSW_IN_TX* bits result in bypassing this clause, when these
> bits are extracted as arguments to pmc_reprogram_counter below?

Once upon the time, the "PERF_TYPE_RAW" was introduced in the
perf with comment "available TYPE space, raw is the max value",
which means, per my understanding, it's our final type choice
for creating a valid perf_event when HSW_IN_TX* bits are set
and KVM needs to hack other perf_event_attr stuff for this
HSW_IN_TX feature with the help of extracted arguments.

> 
> Reviewed-by: Jim Mattson <jmattson@google.com>
>
Paolo Bonzini Dec. 9, 2021, 7:24 p.m. UTC | #3
On 12/9/21 04:52, Jim Mattson wrote:
> Why don't we just use PERF_TYPE_RAW for guest counters all of the
> time? What is the advantage of matching entries in a table so that we
> can use PERF_TYPE_HARDWARE?

In theory it's so that hosts without support for architectural PMU 
events can map the architectural PMU events to their own.  In practice, 
we can probably return early from intel_pmu_refresh if the host does not 
support X86_FEATURE_ARCH_PERFMON, because only the oldest Pentium 4 
toasters^Wprocessors probably have VMX but lack architectural PMU.

Paolo
Jim Mattson Feb. 5, 2022, 1:55 a.m. UTC | #4
On Mon, Nov 29, 2021 at 11:42 PM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Like Xu <likexu@tencent.com>
>
> The find_arch_event() returns a "unsigned int" value,
> which is used by the pmc_reprogram_counter() to
> program a PERF_TYPE_HARDWARE type perf_event.
>
> The returned value is actually the kernel defined gernic
> perf_hw_id, let's rename it to pmc_perf_hw_id() with simpler
> incoming parameters for better self-explanation.
>
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  arch/x86/kvm/pmu.c           | 8 +-------
>  arch/x86/kvm/pmu.h           | 3 +--
>  arch/x86/kvm/svm/pmu.c       | 8 ++++----
>  arch/x86/kvm/vmx/pmu_intel.c | 9 +++++----
>  4 files changed, 11 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 09873f6488f7..3b3ccf5b1106 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -174,7 +174,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>  {
>         unsigned config, type = PERF_TYPE_RAW;
> -       u8 event_select, unit_mask;
>         struct kvm *kvm = pmc->vcpu->kvm;
>         struct kvm_pmu_event_filter *filter;
>         int i;
> @@ -206,17 +205,12 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>         if (!allow_event)
>                 return;
>
> -       event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> -       unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
> -
>         if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>                           ARCH_PERFMON_EVENTSEL_INV |
>                           ARCH_PERFMON_EVENTSEL_CMASK |
>                           HSW_IN_TX |
>                           HSW_IN_TX_CHECKPOINTED))) {
> -               config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
> -                                                     event_select,
> -                                                     unit_mask);
> +               config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
>                 if (config != PERF_COUNT_HW_MAX)
>                         type = PERF_TYPE_HARDWARE;
>         }
> diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
> index 59d6b76203d5..dd7dbb1c5048 100644
> --- a/arch/x86/kvm/pmu.h
> +++ b/arch/x86/kvm/pmu.h
> @@ -24,8 +24,7 @@ struct kvm_event_hw_type_mapping {
>  };
>
>  struct kvm_pmu_ops {
> -       unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select,
> -                                   u8 unit_mask);
> +       unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc);
>         unsigned (*find_fixed_event)(int idx);
>         bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
>         struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 0cf05e4caa4c..fb0ce8cda8a7 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -138,10 +138,10 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
>         return &pmu->gp_counters[msr_to_index(msr)];
>  }
>
> -static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
> -                                   u8 event_select,
> -                                   u8 unit_mask)
> +static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
>  {
> +       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
On AMD, the event select is 12 bits.
> +       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>         int i;
>
>         for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
> @@ -323,7 +323,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
>  }
>
>  struct kvm_pmu_ops amd_pmu_ops = {
> -       .find_arch_event = amd_find_arch_event,
> +       .pmc_perf_hw_id = amd_pmc_perf_hw_id,
>         .find_fixed_event = amd_find_fixed_event,
>         .pmc_is_enabled = amd_pmc_is_enabled,
>         .pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index b7ab5fd03681..67a0188ecdc5 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -68,10 +68,11 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
>                 reprogram_counter(pmu, bit);
>  }
>
> -static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
> -                                     u8 event_select,
> -                                     u8 unit_mask)
> +static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
>  {
> +       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
> +       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> +       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>         int i;
>
>         for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
> @@ -719,7 +720,7 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
>  }
>
>  struct kvm_pmu_ops intel_pmu_ops = {
> -       .find_arch_event = intel_find_arch_event,
> +       .pmc_perf_hw_id = intel_pmc_perf_hw_id,
>         .find_fixed_event = intel_find_fixed_event,
>         .pmc_is_enabled = intel_pmc_is_enabled,
>         .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
> --
> 2.33.1
>
Like Xu Feb. 9, 2022, 9 a.m. UTC | #5
On 5/2/2022 9:55 am, Jim Mattson wrote:
>> +static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
>>   {
>> +       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> On AMD, the event select is 12 bits.

Out of your carefulness, we already know this fact.

This function to get the perf_hw_id by the last 16 bits still works because we 
currently
do not have a 12-bits-select event defined in the amd_event_mapping[]. The 
12-bits-select
events (if any) will be programed in the type of PERF_TYPE_RAW.

IMO, a minor patch that renames some AMD variables and updates the relevant comments
to avoid misunderstandings is quite acceptable.

Thanks,
Like Xu
Jim Mattson Feb. 9, 2022, 7:30 p.m. UTC | #6
On Wed, Feb 9, 2022 at 1:00 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> On 5/2/2022 9:55 am, Jim Mattson wrote:
> >> +static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
> >>   {
> >> +       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> > On AMD, the event select is 12 bits.
>
> Out of your carefulness, we already know this fact.
>
> This function to get the perf_hw_id by the last 16 bits still works because we
> currently
> do not have a 12-bits-select event defined in the amd_event_mapping[]. The
> 12-bits-select
> events (if any) will be programed in the type of PERF_TYPE_RAW.

I beg to differ. It doesn't matter whether there are 12-bit event
selects in amd_event_mapping[] or not. The fundamental problem is that
the equality operation on event selects is broken, because it ignores
the high 4 bits. Hence, we may actually find an entry in that table
that we *think* is for the requested event, but instead it's for some
other event with 0 in the high 4 bits. For example, if the guest
requests event 0x1d0 (retired fused instructions), they will get event
0xd0 instead. According to amd_event_mapping, event 0xd0 is "
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND." However, according to the
Milan PPR, event 0xd0 doesn't exist. So, I don't actually know what
we're counting.

At the very least, we need a patch like the following (which I fully
expect gmail to mangle):

--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -210,7 +210,8 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
        if (!allow_event)
                return;

-       if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
+       if (!(eventsel & ((0xFULL << 32) |
+                         ARCH_PERFMON_EVENTSEL_EDGE |
                          ARCH_PERFMON_EVENTSEL_INV |
                          ARCH_PERFMON_EVENTSEL_CMASK |
                          HSW_IN_TX |

By the way, the following events from amd_event_mapping[] are not
listed in the Milan PPR:
{ 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
{ 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
{ 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
{ 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }

Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
for newer AMD processors?
Like Xu Feb. 10, 2022, 11:28 a.m. UTC | #7
cc Kim and Ravi to help confirm more details about this change.

On 10/2/2022 3:30 am, Jim Mattson wrote:
> By the way, the following events from amd_event_mapping[] are not
> listed in the Milan PPR:
> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
> 
> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
> for newer AMD processors?
Ravi Bangoria Feb. 11, 2022, 9:56 a.m. UTC | #8
On 10-Feb-22 4:58 PM, Like Xu wrote:
> cc Kim and Ravi to help confirm more details about this change.
> 
> On 10/2/2022 3:30 am, Jim Mattson wrote:
>> By the way, the following events from amd_event_mapping[] are not
>> listed in the Milan PPR:
>> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
>> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
>> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
>> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
>>
>> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
>> for newer AMD processors?

I think Like's other patch series to unify event mapping across kvm
and host will fix it. No?
https://lore.kernel.org/lkml/20220117085307.93030-4-likexu@tencent.com

- Ravi
Jim Mattson Feb. 11, 2022, 6:16 p.m. UTC | #9
On Fri, Feb 11, 2022 at 1:56 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
>
>
> On 10-Feb-22 4:58 PM, Like Xu wrote:
> > cc Kim and Ravi to help confirm more details about this change.
> >
> > On 10/2/2022 3:30 am, Jim Mattson wrote:
> >> By the way, the following events from amd_event_mapping[] are not
> >> listed in the Milan PPR:
> >> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
> >> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
> >> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
> >> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
> >>
> >> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
> >> for newer AMD processors?
>
> I think Like's other patch series to unify event mapping across kvm
> and host will fix it. No?
> https://lore.kernel.org/lkml/20220117085307.93030-4-likexu@tencent.com

Yes, that should fix it. But why do we even bother? What is the
downside of using PERF_TYPE_RAW all of the time?
Ravi Bangoria Feb. 14, 2022, 10:14 a.m. UTC | #10
On 11-Feb-22 11:46 PM, Jim Mattson wrote:
> On Fri, Feb 11, 2022 at 1:56 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>>
>>
>>
>> On 10-Feb-22 4:58 PM, Like Xu wrote:
>>> cc Kim and Ravi to help confirm more details about this change.
>>>
>>> On 10/2/2022 3:30 am, Jim Mattson wrote:
>>>> By the way, the following events from amd_event_mapping[] are not
>>>> listed in the Milan PPR:
>>>> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
>>>> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
>>>> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
>>>> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
>>>>
>>>> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
>>>> for newer AMD processors?
>>
>> I think Like's other patch series to unify event mapping across kvm
>> and host will fix it. No?
>> https://lore.kernel.org/lkml/20220117085307.93030-4-likexu@tencent.com
> 
> Yes, that should fix it. But why do we even bother? What is the
> downside of using PERF_TYPE_RAW all of the time?

There are few places where PERF_TYPE_HARDWARE and PERF_TYPE_RAW are treated
differently. Ex, x86_pmu_event_init(), perf_init_event(). So I think it makes
sense to keep using PERF_TYPE_HARDWARE for generalized events?

Ravi
Like Xu Feb. 16, 2022, 7:44 a.m. UTC | #11
On 14/2/2022 6:14 pm, Ravi Bangoria wrote:
> 
> 
> On 11-Feb-22 11:46 PM, Jim Mattson wrote:
>> On Fri, Feb 11, 2022 at 1:56 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>>>
>>>
>>>
>>> On 10-Feb-22 4:58 PM, Like Xu wrote:
>>>> cc Kim and Ravi to help confirm more details about this change.
>>>>
>>>> On 10/2/2022 3:30 am, Jim Mattson wrote:
>>>>> By the way, the following events from amd_event_mapping[] are not
>>>>> listed in the Milan PPR:
>>>>> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
>>>>> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
>>>>> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
>>>>> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
>>>>>
>>>>> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
>>>>> for newer AMD processors?

So do we need another amd_f19h_perfmon_event_map[] in the host perf code ?

>>>
>>> I think Like's other patch series to unify event mapping across kvm
>>> and host will fix it. No?
>>> https://lore.kernel.org/lkml/20220117085307.93030-4-likexu@tencent.com
>>
>> Yes, that should fix it. But why do we even bother? What is the
>> downside of using PERF_TYPE_RAW all of the time?
> 
> There are few places where PERF_TYPE_HARDWARE and PERF_TYPE_RAW are treated
> differently. Ex, x86_pmu_event_init(), perf_init_event(). So I think it makes
> sense to keep using PERF_TYPE_HARDWARE for generalized events?
> 
> Ravi
Ravi Bangoria Feb. 16, 2022, 11:24 a.m. UTC | #12
On 16-Feb-22 1:14 PM, Like Xu wrote:
> On 14/2/2022 6:14 pm, Ravi Bangoria wrote:
>>
>>
>> On 11-Feb-22 11:46 PM, Jim Mattson wrote:
>>> On Fri, Feb 11, 2022 at 1:56 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>>>>
>>>>
>>>>
>>>> On 10-Feb-22 4:58 PM, Like Xu wrote:
>>>>> cc Kim and Ravi to help confirm more details about this change.
>>>>>
>>>>> On 10/2/2022 3:30 am, Jim Mattson wrote:
>>>>>> By the way, the following events from amd_event_mapping[] are not
>>>>>> listed in the Milan PPR:
>>>>>> { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }
>>>>>> { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }
>>>>>> { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }
>>>>>> { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }
>>>>>>
>>>>>> Perhaps we should build a table based on amd_f17h_perfmon_event_map[]
>>>>>> for newer AMD processors?
> 
> So do we need another amd_f19h_perfmon_event_map[] in the host perf code ?

I don't think so.

CACHE_REFERENCES/MISSES eventcode and umask for Milan is same as f17h.
Although STALLED_CYCLES_FRONTEND/BACKEND has been removed from PPR
event list, it will continue to work on Milan.

Ravi
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 09873f6488f7..3b3ccf5b1106 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -174,7 +174,6 @@  static bool pmc_resume_counter(struct kvm_pmc *pmc)
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 {
 	unsigned config, type = PERF_TYPE_RAW;
-	u8 event_select, unit_mask;
 	struct kvm *kvm = pmc->vcpu->kvm;
 	struct kvm_pmu_event_filter *filter;
 	int i;
@@ -206,17 +205,12 @@  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
 	if (!allow_event)
 		return;
 
-	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-
 	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
 			  ARCH_PERFMON_EVENTSEL_INV |
 			  ARCH_PERFMON_EVENTSEL_CMASK |
 			  HSW_IN_TX |
 			  HSW_IN_TX_CHECKPOINTED))) {
-		config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc),
-						      event_select,
-						      unit_mask);
+		config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc);
 		if (config != PERF_COUNT_HW_MAX)
 			type = PERF_TYPE_HARDWARE;
 	}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 59d6b76203d5..dd7dbb1c5048 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -24,8 +24,7 @@  struct kvm_event_hw_type_mapping {
 };
 
 struct kvm_pmu_ops {
-	unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select,
-				    u8 unit_mask);
+	unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc);
 	unsigned (*find_fixed_event)(int idx);
 	bool (*pmc_is_enabled)(struct kvm_pmc *pmc);
 	struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 0cf05e4caa4c..fb0ce8cda8a7 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -138,10 +138,10 @@  static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
 	return &pmu->gp_counters[msr_to_index(msr)];
 }
 
-static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
-				    u8 event_select,
-				    u8 unit_mask)
+static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc)
 {
+	u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
@@ -323,7 +323,7 @@  static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 }
 
 struct kvm_pmu_ops amd_pmu_ops = {
-	.find_arch_event = amd_find_arch_event,
+	.pmc_perf_hw_id = amd_pmc_perf_hw_id,
 	.find_fixed_event = amd_find_fixed_event,
 	.pmc_is_enabled = amd_pmc_is_enabled,
 	.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b7ab5fd03681..67a0188ecdc5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,10 +68,11 @@  static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
 		reprogram_counter(pmu, bit);
 }
 
-static unsigned intel_find_arch_event(struct kvm_pmu *pmu,
-				      u8 event_select,
-				      u8 unit_mask)
+static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc)
 {
+	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+	u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
@@ -719,7 +720,7 @@  static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 }
 
 struct kvm_pmu_ops intel_pmu_ops = {
-	.find_arch_event = intel_find_arch_event,
+	.pmc_perf_hw_id = intel_pmc_perf_hw_id,
 	.find_fixed_event = intel_find_fixed_event,
 	.pmc_is_enabled = intel_pmc_is_enabled,
 	.pmc_idx_to_pmc = intel_pmc_idx_to_pmc,