diff mbox series

[4/4] KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022

Message ID 20220905123946.95223-5-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/svm/pmu: Add AMD Guest PerfMonV2 support | expand

Commit Message

Like Xu Sept. 5, 2022, 12:39 p.m. UTC
From: Sandipan Das <sandipan.das@amd.com>

CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
new performance monitoring features for AMD processors.

Bit 0 of EAX indicates support for Performance Monitoring
Version 2 (PerfMonV2) features. If found to be set during
PMU initialization, the EBX bits of the same CPUID function
can be used to determine the number of available PMCs for
different PMU types.

Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
that guests can make use of the PerfMonV2 features.

Co-developed-by: Like Xu <likexu@tencent.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
 arch/x86/include/asm/perf_event.h |  8 ++++++++
 arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

Comments

Jim Mattson Sept. 5, 2022, 5:36 p.m. UTC | #1
On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Sandipan Das <sandipan.das@amd.com>
>
> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> new performance monitoring features for AMD processors.
>
> Bit 0 of EAX indicates support for Performance Monitoring
> Version 2 (PerfMonV2) features. If found to be set during
> PMU initialization, the EBX bits of the same CPUID function
> can be used to determine the number of available PMCs for
> different PMU types.
>
> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> that guests can make use of the PerfMonV2 features.
>
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> ---
>  arch/x86/include/asm/perf_event.h |  8 ++++++++
>  arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
>  2 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index f6fc8dd51ef4..c848f504e467 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>         unsigned int            full;
>  };
>
> +union cpuid_0x80000022_eax {
> +       struct {
> +               /* Performance Monitoring Version 2 Supported */
> +               unsigned int    perfmon_v2:1;
> +       } split;
> +       unsigned int            full;
> +};
> +
>  struct x86_pmu_capability {
>         int             version;
>         int             num_counters_gp;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 75dcf7a72605..08a29ab096d2 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>                 entry->edx = 0;
>                 break;
>         case 0x80000000:
> -               entry->eax = min(entry->eax, 0x80000021);
> +               entry->eax = min(entry->eax, 0x80000022);
>                 /*
>                  * Serializing LFENCE is reported in a multitude of ways, and
>                  * NullSegClearsBase is not reported in CPUID on Zen2; help
> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>                 if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>                         entry->eax |= BIT(6);
>                 break;
> +       /* AMD Extended Performance Monitoring and Debug */
> +       case 0x80000022: {
> +               union cpuid_0x80000022_eax eax;
> +               union cpuid_0x80000022_ebx ebx;
> +
> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> +               if (!enable_pmu)
> +                       break;
> +
> +               if (kvm_pmu_cap.version > 1) {
> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
> +                       eax.split.perfmon_v2 = 1;
> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> +                                                    KVM_AMD_PMC_MAX_GENERIC);

Note that the number of core PMCs has to be at least 6 if
guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
could claim fewer, but the first 6 PMCs must work, per the v1 PMU
spec. That is, software that knows about PERFCTR_CORE, but not about
PMU v2, can rightfully expect 6 PMCs.


> +               }
> +               entry->eax = eax.full;
> +               entry->ebx = ebx.full;
> +               break;
> +       }
>         /*Add support for Centaur's CPUID instruction*/
>         case 0xC0000000:
>                 /*Just support up to 0xC0000004 now*/
> --
> 2.37.3
>
Like Xu Sept. 6, 2022, 12:53 p.m. UTC | #2
On 6/9/2022 1:36 am, Jim Mattson wrote:
> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> From: Sandipan Das <sandipan.das@amd.com>
>>
>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
>> new performance monitoring features for AMD processors.
>>
>> Bit 0 of EAX indicates support for Performance Monitoring
>> Version 2 (PerfMonV2) features. If found to be set during
>> PMU initialization, the EBX bits of the same CPUID function
>> can be used to determine the number of available PMCs for
>> different PMU types.
>>
>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
>> that guests can make use of the PerfMonV2 features.
>>
>> Co-developed-by: Like Xu <likexu@tencent.com>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>> ---
>>   arch/x86/include/asm/perf_event.h |  8 ++++++++
>>   arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
>>   2 files changed, 28 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index f6fc8dd51ef4..c848f504e467 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>>          unsigned int            full;
>>   };
>>
>> +union cpuid_0x80000022_eax {
>> +       struct {
>> +               /* Performance Monitoring Version 2 Supported */
>> +               unsigned int    perfmon_v2:1;
>> +       } split;
>> +       unsigned int            full;
>> +};
>> +
>>   struct x86_pmu_capability {
>>          int             version;
>>          int             num_counters_gp;
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 75dcf7a72605..08a29ab096d2 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>                  entry->edx = 0;
>>                  break;
>>          case 0x80000000:
>> -               entry->eax = min(entry->eax, 0x80000021);
>> +               entry->eax = min(entry->eax, 0x80000022);
>>                  /*
>>                   * Serializing LFENCE is reported in a multitude of ways, and
>>                   * NullSegClearsBase is not reported in CPUID on Zen2; help
>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>                  if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>                          entry->eax |= BIT(6);
>>                  break;
>> +       /* AMD Extended Performance Monitoring and Debug */
>> +       case 0x80000022: {
>> +               union cpuid_0x80000022_eax eax;
>> +               union cpuid_0x80000022_ebx ebx;
>> +
>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>> +               if (!enable_pmu)
>> +                       break;
>> +
>> +               if (kvm_pmu_cap.version > 1) {
>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
>> +                       eax.split.perfmon_v2 = 1;
>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
> 
> Note that the number of core PMCs has to be at least 6 if
> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> spec. That is, software that knows about PERFCTR_CORE, but not about
> PMU v2, can rightfully expect 6 PMCs.

I thought the NumCorePmc number would only make sense if 
CPUID.80000022.eax.perfmon_v2
bit was present, but considering that the user space is perfectly fine with just 
configuring the
NumCorePmc number without setting perfmon_v2 bit at all, so how about:

	/* AMD Extended Performance Monitoring and Debug */
	case 0x80000022: {
		union cpuid_0x80000022_eax eax;
		union cpuid_0x80000022_ebx ebx;
		bool perfctr_core;

		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		if (!enable_pmu)
			break;

		perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
		if (!perfctr_core)
			ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
		if (kvm_pmu_cap.version > 1) {
			/* AMD PerfMon is only supported up to V2 in the KVM. */
			eax.split.perfmon_v2 = 1;
			ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
						     KVM_AMD_PMC_MAX_GENERIC);
		}
		if (perfctr_core) {
			ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
						     AMD64_NUM_COUNTERS_CORE);
		}

		entry->eax = eax.full;
		entry->ebx = ebx.full;
		break;
	}

?

Once 0x80000022 appears, ebx.split.num_core_pmc will report only
the real "Number of Core Performance Counters" regardless of perfmon_v2.

> 
> 
>> +               }
>> +               entry->eax = eax.full;
>> +               entry->ebx = ebx.full;
>> +               break;
>> +       }
>>          /*Add support for Centaur's CPUID instruction*/
>>          case 0xC0000000:
>>                  /*Just support up to 0xC0000004 now*/
>> --
>> 2.37.3
>>
Jim Mattson Sept. 6, 2022, 8:08 p.m. UTC | #3
On Tue, Sep 6, 2022 at 5:53 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> On 6/9/2022 1:36 am, Jim Mattson wrote:
> > On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
> >>
> >> From: Sandipan Das <sandipan.das@amd.com>
> >>
> >> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> >> new performance monitoring features for AMD processors.
> >>
> >> Bit 0 of EAX indicates support for Performance Monitoring
> >> Version 2 (PerfMonV2) features. If found to be set during
> >> PMU initialization, the EBX bits of the same CPUID function
> >> can be used to determine the number of available PMCs for
> >> different PMU types.
> >>
> >> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> >> that guests can make use of the PerfMonV2 features.
> >>
> >> Co-developed-by: Like Xu <likexu@tencent.com>
> >> Signed-off-by: Like Xu <likexu@tencent.com>
> >> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> >> ---
> >>   arch/x86/include/asm/perf_event.h |  8 ++++++++
> >>   arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
> >>   2 files changed, 28 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >> index f6fc8dd51ef4..c848f504e467 100644
> >> --- a/arch/x86/include/asm/perf_event.h
> >> +++ b/arch/x86/include/asm/perf_event.h
> >> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
> >>          unsigned int            full;
> >>   };
> >>
> >> +union cpuid_0x80000022_eax {
> >> +       struct {
> >> +               /* Performance Monitoring Version 2 Supported */
> >> +               unsigned int    perfmon_v2:1;
> >> +       } split;
> >> +       unsigned int            full;
> >> +};
> >> +
> >>   struct x86_pmu_capability {
> >>          int             version;
> >>          int             num_counters_gp;
> >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >> index 75dcf7a72605..08a29ab096d2 100644
> >> --- a/arch/x86/kvm/cpuid.c
> >> +++ b/arch/x86/kvm/cpuid.c
> >> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>                  entry->edx = 0;
> >>                  break;
> >>          case 0x80000000:
> >> -               entry->eax = min(entry->eax, 0x80000021);
> >> +               entry->eax = min(entry->eax, 0x80000022);
> >>                  /*
> >>                   * Serializing LFENCE is reported in a multitude of ways, and
> >>                   * NullSegClearsBase is not reported in CPUID on Zen2; help
> >> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>                  if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
> >>                          entry->eax |= BIT(6);
> >>                  break;
> >> +       /* AMD Extended Performance Monitoring and Debug */
> >> +       case 0x80000022: {
> >> +               union cpuid_0x80000022_eax eax;
> >> +               union cpuid_0x80000022_ebx ebx;
> >> +
> >> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >> +               if (!enable_pmu)
> >> +                       break;
> >> +
> >> +               if (kvm_pmu_cap.version > 1) {
> >> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
> >> +                       eax.split.perfmon_v2 = 1;
> >> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >> +                                                    KVM_AMD_PMC_MAX_GENERIC);
> >
> > Note that the number of core PMCs has to be at least 6 if
> > guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> > could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> > spec. That is, software that knows about PERFCTR_CORE, but not about
> > PMU v2, can rightfully expect 6 PMCs.
>
> I thought the NumCorePmc number would only make sense if
> CPUID.80000022.eax.perfmon_v2
> bit was present, but considering that the user space is perfectly fine with just
> configuring the
> NumCorePmc number without setting perfmon_v2 bit at all, so how about:

CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
present. It's hard to know in the absence of documentation.

>         /* AMD Extended Performance Monitoring and Debug */
>         case 0x80000022: {
>                 union cpuid_0x80000022_eax eax;
>                 union cpuid_0x80000022_ebx ebx;
>                 bool perfctr_core;
>
>                 entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>                 if (!enable_pmu)
>                         break;
>
>                 perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>                 if (!perfctr_core)
>                         ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>                 if (kvm_pmu_cap.version > 1) {
>                         /* AMD PerfMon is only supported up to V2 in the KVM. */
>                         eax.split.perfmon_v2 = 1;
>                         ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>                                                      KVM_AMD_PMC_MAX_GENERIC);
>                 }
>                 if (perfctr_core) {
>                         ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>                                                      AMD64_NUM_COUNTERS_CORE);
>                 }

This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.

>
>                 entry->eax = eax.full;
>                 entry->ebx = ebx.full;
>                 break;
>         }
>
> ?
>
> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
> the real "Number of Core Performance Counters" regardless of perfmon_v2.
>
> >
> >
> >> +               }
> >> +               entry->eax = eax.full;
> >> +               entry->ebx = ebx.full;
> >> +               break;
> >> +       }
> >>          /*Add support for Centaur's CPUID instruction*/
> >>          case 0xC0000000:
> >>                  /*Just support up to 0xC0000004 now*/
> >> --
> >> 2.37.3
> >>
Like Xu Sept. 7, 2022, 3:59 a.m. UTC | #4
On 7/9/2022 4:08 am, Jim Mattson wrote:
> On Tue, Sep 6, 2022 at 5:53 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>
>> On 6/9/2022 1:36 am, Jim Mattson wrote:
>>> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
>>>>
>>>> From: Sandipan Das <sandipan.das@amd.com>
>>>>
>>>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
>>>> new performance monitoring features for AMD processors.
>>>>
>>>> Bit 0 of EAX indicates support for Performance Monitoring
>>>> Version 2 (PerfMonV2) features. If found to be set during
>>>> PMU initialization, the EBX bits of the same CPUID function
>>>> can be used to determine the number of available PMCs for
>>>> different PMU types.
>>>>
>>>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
>>>> that guests can make use of the PerfMonV2 features.
>>>>
>>>> Co-developed-by: Like Xu <likexu@tencent.com>
>>>> Signed-off-by: Like Xu <likexu@tencent.com>
>>>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>>>> ---
>>>>    arch/x86/include/asm/perf_event.h |  8 ++++++++
>>>>    arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
>>>>    2 files changed, 28 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>>>> index f6fc8dd51ef4..c848f504e467 100644
>>>> --- a/arch/x86/include/asm/perf_event.h
>>>> +++ b/arch/x86/include/asm/perf_event.h
>>>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>>>>           unsigned int            full;
>>>>    };
>>>>
>>>> +union cpuid_0x80000022_eax {
>>>> +       struct {
>>>> +               /* Performance Monitoring Version 2 Supported */
>>>> +               unsigned int    perfmon_v2:1;
>>>> +       } split;
>>>> +       unsigned int            full;
>>>> +};
>>>> +
>>>>    struct x86_pmu_capability {
>>>>           int             version;
>>>>           int             num_counters_gp;
>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>> --- a/arch/x86/kvm/cpuid.c
>>>> +++ b/arch/x86/kvm/cpuid.c
>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>                   entry->edx = 0;
>>>>                   break;
>>>>           case 0x80000000:
>>>> -               entry->eax = min(entry->eax, 0x80000021);
>>>> +               entry->eax = min(entry->eax, 0x80000022);
>>>>                   /*
>>>>                    * Serializing LFENCE is reported in a multitude of ways, and
>>>>                    * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>                   if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>                           entry->eax |= BIT(6);
>>>>                   break;
>>>> +       /* AMD Extended Performance Monitoring and Debug */
>>>> +       case 0x80000022: {
>>>> +               union cpuid_0x80000022_eax eax;
>>>> +               union cpuid_0x80000022_ebx ebx;
>>>> +
>>>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>> +               if (!enable_pmu)
>>>> +                       break;
>>>> +
>>>> +               if (kvm_pmu_cap.version > 1) {
>>>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>> +                       eax.split.perfmon_v2 = 1;
>>>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
>>>
>>> Note that the number of core PMCs has to be at least 6 if
>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>> PMU v2, can rightfully expect 6 PMCs.
>>
>> I thought the NumCorePmc number would only make sense if
>> CPUID.80000022.eax.perfmon_v2
>> bit was present, but considering that the user space is perfectly fine with just
>> configuring the
>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
> 
> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
> present. It's hard to know in the absence of documentation.

Whenever this happens, we may always leave the definition of behavior to the 
hypervisor.

> 
>>          /* AMD Extended Performance Monitoring and Debug */
>>          case 0x80000022: {
>>                  union cpuid_0x80000022_eax eax;
>>                  union cpuid_0x80000022_ebx ebx;
>>                  bool perfctr_core;
>>
>>                  entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>                  if (!enable_pmu)
>>                          break;
>>
>>                  perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>                  if (!perfctr_core)
>>                          ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>                  if (kvm_pmu_cap.version > 1) {
>>                          /* AMD PerfMon is only supported up to V2 in the KVM. */
>>                          eax.split.perfmon_v2 = 1;
>>                          ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>                                                       KVM_AMD_PMC_MAX_GENERIC);
>>                  }
>>                  if (perfctr_core) {
>>                          ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>                                                       AMD64_NUM_COUNTERS_CORE);
>>                  }
> 
> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.

K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o 
perfctr_core.

> 
>>
>>                  entry->eax = eax.full;
>>                  entry->ebx = ebx.full;
>>                  break;
>>          }
>>
>> ?
>>
>> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
>> the real "Number of Core Performance Counters" regardless of perfmon_v2.
>>
>>>
>>>
>>>> +               }
>>>> +               entry->eax = eax.full;
>>>> +               entry->ebx = ebx.full;
>>>> +               break;
>>>> +       }
>>>>           /*Add support for Centaur's CPUID instruction*/
>>>>           case 0xC0000000:
>>>>                   /*Just support up to 0xC0000004 now*/
>>>> --
>>>> 2.37.3
>>>>
Jim Mattson Sept. 7, 2022, 4:11 a.m. UTC | #5
On Tue, Sep 6, 2022 at 8:59 PM Like Xu <like.xu.linux@gmail.com> wrote:
>
> On 7/9/2022 4:08 am, Jim Mattson wrote:
> > On Tue, Sep 6, 2022 at 5:53 AM Like Xu <like.xu.linux@gmail.com> wrote:
> >>
> >> On 6/9/2022 1:36 am, Jim Mattson wrote:
> >>> On Mon, Sep 5, 2022 at 5:45 AM Like Xu <like.xu.linux@gmail.com> wrote:
> >>>>
> >>>> From: Sandipan Das <sandipan.das@amd.com>
> >>>>
> >>>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> >>>> new performance monitoring features for AMD processors.
> >>>>
> >>>> Bit 0 of EAX indicates support for Performance Monitoring
> >>>> Version 2 (PerfMonV2) features. If found to be set during
> >>>> PMU initialization, the EBX bits of the same CPUID function
> >>>> can be used to determine the number of available PMCs for
> >>>> different PMU types.
> >>>>
> >>>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> >>>> that guests can make use of the PerfMonV2 features.
> >>>>
> >>>> Co-developed-by: Like Xu <likexu@tencent.com>
> >>>> Signed-off-by: Like Xu <likexu@tencent.com>
> >>>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> >>>> ---
> >>>>    arch/x86/include/asm/perf_event.h |  8 ++++++++
> >>>>    arch/x86/kvm/cpuid.c              | 21 ++++++++++++++++++++-
> >>>>    2 files changed, 28 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >>>> index f6fc8dd51ef4..c848f504e467 100644
> >>>> --- a/arch/x86/include/asm/perf_event.h
> >>>> +++ b/arch/x86/include/asm/perf_event.h
> >>>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
> >>>>           unsigned int            full;
> >>>>    };
> >>>>
> >>>> +union cpuid_0x80000022_eax {
> >>>> +       struct {
> >>>> +               /* Performance Monitoring Version 2 Supported */
> >>>> +               unsigned int    perfmon_v2:1;
> >>>> +       } split;
> >>>> +       unsigned int            full;
> >>>> +};
> >>>> +
> >>>>    struct x86_pmu_capability {
> >>>>           int             version;
> >>>>           int             num_counters_gp;
> >>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> >>>> index 75dcf7a72605..08a29ab096d2 100644
> >>>> --- a/arch/x86/kvm/cpuid.c
> >>>> +++ b/arch/x86/kvm/cpuid.c
> >>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>>>                   entry->edx = 0;
> >>>>                   break;
> >>>>           case 0x80000000:
> >>>> -               entry->eax = min(entry->eax, 0x80000021);
> >>>> +               entry->eax = min(entry->eax, 0x80000022);
> >>>>                   /*
> >>>>                    * Serializing LFENCE is reported in a multitude of ways, and
> >>>>                    * NullSegClearsBase is not reported in CPUID on Zen2; help
> >>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
> >>>>                   if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
> >>>>                           entry->eax |= BIT(6);
> >>>>                   break;
> >>>> +       /* AMD Extended Performance Monitoring and Debug */
> >>>> +       case 0x80000022: {
> >>>> +               union cpuid_0x80000022_eax eax;
> >>>> +               union cpuid_0x80000022_ebx ebx;
> >>>> +
> >>>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >>>> +               if (!enable_pmu)
> >>>> +                       break;
> >>>> +
> >>>> +               if (kvm_pmu_cap.version > 1) {
> >>>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
> >>>> +                       eax.split.perfmon_v2 = 1;
> >>>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >>>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
> >>>
> >>> Note that the number of core PMCs has to be at least 6 if
> >>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
> >>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
> >>> spec. That is, software that knows about PERFCTR_CORE, but not about
> >>> PMU v2, can rightfully expect 6 PMCs.
> >>
> >> I thought the NumCorePmc number would only make sense if
> >> CPUID.80000022.eax.perfmon_v2
> >> bit was present, but considering that the user space is perfectly fine with just
> >> configuring the
> >> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
> >
> > CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
> > present. It's hard to know in the absence of documentation.
>
> Whenever this happens, we may always leave the definition of behavior to the
> hypervisor.

I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
authority on behavior.

> >
> >>          /* AMD Extended Performance Monitoring and Debug */
> >>          case 0x80000022: {
> >>                  union cpuid_0x80000022_eax eax;
> >>                  union cpuid_0x80000022_ebx ebx;
> >>                  bool perfctr_core;
> >>
> >>                  entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> >>                  if (!enable_pmu)
> >>                          break;
> >>
> >>                  perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
> >>                  if (!perfctr_core)
> >>                          ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
> >>                  if (kvm_pmu_cap.version > 1) {
> >>                          /* AMD PerfMon is only supported up to V2 in the KVM. */
> >>                          eax.split.perfmon_v2 = 1;
> >>                          ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
> >>                                                       KVM_AMD_PMC_MAX_GENERIC);
> >>                  }
> >>                  if (perfctr_core) {
> >>                          ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
> >>                                                       AMD64_NUM_COUNTERS_CORE);
> >>                  }
> >
> > This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>
> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
> perfctr_core.

The APM says, "All implementations support the base set of four
performance counter / event-select pairs." That is unequivocal.

> >
> >>
> >>                  entry->eax = eax.full;
> >>                  entry->ebx = ebx.full;
> >>                  break;
> >>          }
> >>
> >> ?
> >>
> >> Once 0x80000022 appears, ebx.split.num_core_pmc will report only
> >> the real "Number of Core Performance Counters" regardless of perfmon_v2.
> >>
> >>>
> >>>
> >>>> +               }
> >>>> +               entry->eax = eax.full;
> >>>> +               entry->ebx = ebx.full;
> >>>> +               break;
> >>>> +       }
> >>>>           /*Add support for Centaur's CPUID instruction*/
> >>>>           case 0xC0000000:
> >>>>                   /*Just support up to 0xC0000004 now*/
> >>>> --
> >>>> 2.37.3
> >>>>
Sandipan Das Sept. 7, 2022, 5:52 a.m. UTC | #6
On 9/7/2022 9:41 AM, Jim Mattson wrote:
> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <like.xu.linux@gmail.com> wrote:
> [...]
>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>                   entry->edx = 0;
>>>>>>                   break;
>>>>>>           case 0x80000000:
>>>>>> -               entry->eax = min(entry->eax, 0x80000021);
>>>>>> +               entry->eax = min(entry->eax, 0x80000022);
>>>>>>                   /*
>>>>>>                    * Serializing LFENCE is reported in a multitude of ways, and
>>>>>>                    * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>                   if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>>                           entry->eax |= BIT(6);
>>>>>>                   break;
>>>>>> +       /* AMD Extended Performance Monitoring and Debug */
>>>>>> +       case 0x80000022: {
>>>>>> +               union cpuid_0x80000022_eax eax;
>>>>>> +               union cpuid_0x80000022_ebx ebx;
>>>>>> +
>>>>>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>> +               if (!enable_pmu)
>>>>>> +                       break;
>>>>>> +
>>>>>> +               if (kvm_pmu_cap.version > 1) {
>>>>>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>> +                       eax.split.perfmon_v2 = 1;
>>>>>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
>>>>>
>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>
>>>> I thought the NumCorePmc number would only make sense if
>>>> CPUID.80000022.eax.perfmon_v2
>>>> bit was present, but considering that the user space is perfectly fine with just
>>>> configuring the
>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>
>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>> present. It's hard to know in the absence of documentation.
>>
>> Whenever this happens, we may always leave the definition of behavior to the
>> hypervisor.
> 
> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
> authority on behavior.
> 

I understand that official documentation is not out yet. However, for Zen 4
models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.

>>>
>>>>          /* AMD Extended Performance Monitoring and Debug */
>>>>          case 0x80000022: {
>>>>                  union cpuid_0x80000022_eax eax;
>>>>                  union cpuid_0x80000022_ebx ebx;
>>>>                  bool perfctr_core;
>>>>
>>>>                  entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>                  if (!enable_pmu)
>>>>                          break;
>>>>
>>>>                  perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>>                  if (!perfctr_core)
>>>>                          ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>>                  if (kvm_pmu_cap.version > 1) {
>>>>                          /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>                          eax.split.perfmon_v2 = 1;
>>>>                          ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>                                                       KVM_AMD_PMC_MAX_GENERIC);
>>>>                  }
>>>>                  if (perfctr_core) {
>>>>                          ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>>                                                       AMD64_NUM_COUNTERS_CORE);
>>>>                  }
>>>
>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>
>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>> perfctr_core.
> 
> The APM says, "All implementations support the base set of four
> performance counter / event-select pairs." That is unequivocal.
> 

That is true. The same can be inferred from amd_core_pmu_init() in
arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
that the four legacy counters are always available.

- Sandipan
Like Xu Sept. 7, 2022, 6:39 a.m. UTC | #7
On 7/9/2022 1:52 pm, Sandipan Das wrote:
> On 9/7/2022 9:41 AM, Jim Mattson wrote:
>> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <like.xu.linux@gmail.com> wrote:
>> [...]
>>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>                    entry->edx = 0;
>>>>>>>                    break;
>>>>>>>            case 0x80000000:
>>>>>>> -               entry->eax = min(entry->eax, 0x80000021);
>>>>>>> +               entry->eax = min(entry->eax, 0x80000022);
>>>>>>>                    /*
>>>>>>>                     * Serializing LFENCE is reported in a multitude of ways, and
>>>>>>>                     * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>                    if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>>>                            entry->eax |= BIT(6);
>>>>>>>                    break;
>>>>>>> +       /* AMD Extended Performance Monitoring and Debug */
>>>>>>> +       case 0x80000022: {
>>>>>>> +               union cpuid_0x80000022_eax eax;
>>>>>>> +               union cpuid_0x80000022_ebx ebx;
>>>>>>> +
>>>>>>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>>> +               if (!enable_pmu)
>>>>>>> +                       break;
>>>>>>> +
>>>>>>> +               if (kvm_pmu_cap.version > 1) {
>>>>>>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>>> +                       eax.split.perfmon_v2 = 1;
>>>>>>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
>>>>>>
>>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>>
>>>>> I thought the NumCorePmc number would only make sense if
>>>>> CPUID.80000022.eax.perfmon_v2
>>>>> bit was present, but considering that the user space is perfectly fine with just
>>>>> configuring the
>>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>>
>>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>>> present. It's hard to know in the absence of documentation.
>>>
>>> Whenever this happens, we may always leave the definition of behavior to the
>>> hypervisor.
>>
>> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
>> authority on behavior.

The real world isn't like that, because even AMD has multiple implementations in 
cases
where the hardware specs aren't explicitly stated, and sometimes they're 
intentionally vague.
And the hypervisor can't do nothing, it prefers one over the other and maintains 
maximum compatibility with the legacy user space.

>>
> 
> I understand that official documentation is not out yet. However, for Zen 4
> models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
> the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.

Is PerfCtrExtCore a PerfMonV2 or PerfMonV3+ precondition ?
Is PerfCtrExtCore a CPUID.80000022 precondition ?

Should we always expect CPUID_Fn80000022_EBX.NumCorePmc to reflect the real
Number of Core Performance Counters regardless of whether PerfMonV2 is set ?

> 
>>>>
>>>>>           /* AMD Extended Performance Monitoring and Debug */
>>>>>           case 0x80000022: {
>>>>>                   union cpuid_0x80000022_eax eax;
>>>>>                   union cpuid_0x80000022_ebx ebx;
>>>>>                   bool perfctr_core;
>>>>>
>>>>>                   entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>                   if (!enable_pmu)
>>>>>                           break;
>>>>>
>>>>>                   perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>>>                   if (!perfctr_core)
>>>>>                           ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>>>                   if (kvm_pmu_cap.version > 1) {
>>>>>                           /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>                           eax.split.perfmon_v2 = 1;
>>>>>                           ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>                                                        KVM_AMD_PMC_MAX_GENERIC);
>>>>>                   }
>>>>>                   if (perfctr_core) {
>>>>>                           ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>>>                                                        AMD64_NUM_COUNTERS_CORE);
>>>>>                   }
>>>>
>>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>>
>>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>>> perfctr_core.
>>
>> The APM says, "All implementations support the base set of four
>> performance counter / event-select pairs." That is unequivocal.
>>
> 
> That is true. The same can be inferred from amd_core_pmu_init() in
> arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
> that the four legacy counters are always available.
> 
> - Sandipan
Sandipan Das Sept. 8, 2022, 6 a.m. UTC | #8
On 9/7/2022 12:09 PM, Like Xu wrote:
> On 7/9/2022 1:52 pm, Sandipan Das wrote:
>> On 9/7/2022 9:41 AM, Jim Mattson wrote:
>>> On Tue, Sep 6, 2022 at 8:59 PM Like Xu <like.xu.linux@gmail.com> wrote:
>>> [...]
>>>>>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>>>>>> index 75dcf7a72605..08a29ab096d2 100644
>>>>>>>> --- a/arch/x86/kvm/cpuid.c
>>>>>>>> +++ b/arch/x86/kvm/cpuid.c
>>>>>>>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>>                    entry->edx = 0;
>>>>>>>>                    break;
>>>>>>>>            case 0x80000000:
>>>>>>>> -               entry->eax = min(entry->eax, 0x80000021);
>>>>>>>> +               entry->eax = min(entry->eax, 0x80000022);
>>>>>>>>                    /*
>>>>>>>>                     * Serializing LFENCE is reported in a multitude of ways, and
>>>>>>>>                     * NullSegClearsBase is not reported in CPUID on Zen2; help
>>>>>>>> @@ -1203,6 +1203,25 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>>>>>>>                    if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>>>>>>>                            entry->eax |= BIT(6);
>>>>>>>>                    break;
>>>>>>>> +       /* AMD Extended Performance Monitoring and Debug */
>>>>>>>> +       case 0x80000022: {
>>>>>>>> +               union cpuid_0x80000022_eax eax;
>>>>>>>> +               union cpuid_0x80000022_ebx ebx;
>>>>>>>> +
>>>>>>>> +               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>>>> +               if (!enable_pmu)
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               if (kvm_pmu_cap.version > 1) {
>>>>>>>> +                       /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>>>> +                       eax.split.perfmon_v2 = 1;
>>>>>>>> +                       ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>>>> +                                                    KVM_AMD_PMC_MAX_GENERIC);
>>>>>>>
>>>>>>> Note that the number of core PMCs has to be at least 6 if
>>>>>>> guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE). I suppose this leaf
>>>>>>> could claim fewer, but the first 6 PMCs must work, per the v1 PMU
>>>>>>> spec. That is, software that knows about PERFCTR_CORE, but not about
>>>>>>> PMU v2, can rightfully expect 6 PMCs.
>>>>>>
>>>>>> I thought the NumCorePmc number would only make sense if
>>>>>> CPUID.80000022.eax.perfmon_v2
>>>>>> bit was present, but considering that the user space is perfectly fine with just
>>>>>> configuring the
>>>>>> NumCorePmc number without setting perfmon_v2 bit at all, so how about:
>>>>>
>>>>> CPUID.80000022H might only make sense if X86_FEATURE_PERFCTR_CORE is
>>>>> present. It's hard to know in the absence of documentation.
>>>>
>>>> Whenever this happens, we may always leave the definition of behavior to the
>>>> hypervisor.
>>>
>>> I disagree. If CPUID.0H reports "AuthenticAMD," then AMD is the sole
>>> authority on behavior.
> 
> The real world isn't like that, because even AMD has multiple implementations in cases
> where the hardware specs aren't explicitly stated, and sometimes they're intentionally vague.
> And the hypervisor can't do nothing, it prefers one over the other and maintains maximum compatibility with the legacy user space.
> 
>>>
>>
>> I understand that official documentation is not out yet. However, for Zen 4
>> models, it is expected that both the PerfMonV2 bit of CPUID.80000022H EAX and
>> the PerfCtrExtCore bit of CPUID.80000001H ECX will be set.
> 
> Is PerfCtrExtCore a PerfMonV2 or PerfMonV3+ precondition ?
> Is PerfCtrExtCore a CPUID.80000022 precondition ?
> 
> Should we always expect CPUID_Fn80000022_EBX.NumCorePmc to reflect the real
> Number of Core Performance Counters regardless of whether PerfMonV2 is set ?
> 

This is the suggested method for detecting the number of counters:

  If CPUID Fn8000_0022_EAX[PerfMonV2] is set, then use the new interface in
  CPUID Fn8000_0022_EBX to determine the number of counters.

  Else if CPUID Fn8000_0001_ECX[PerfCtrExtCore] is set, then six counters
  are available.

  Else, four legacy counters are available.

There will be an APM update that will have this information in the
"Detecting Hardware Support for Performance Counters" section.

>>
>>>>>
>>>>>>           /* AMD Extended Performance Monitoring and Debug */
>>>>>>           case 0x80000022: {
>>>>>>                   union cpuid_0x80000022_eax eax;
>>>>>>                   union cpuid_0x80000022_ebx ebx;
>>>>>>                   bool perfctr_core;
>>>>>>
>>>>>>                   entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>>>>>>                   if (!enable_pmu)
>>>>>>                           break;
>>>>>>
>>>>>>                   perfctr_core = kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE);
>>>>>>                   if (!perfctr_core)
>>>>>>                           ebx.split.num_core_pmc = AMD64_NUM_COUNTERS;
>>>>>>                   if (kvm_pmu_cap.version > 1) {
>>>>>>                           /* AMD PerfMon is only supported up to V2 in the KVM. */
>>>>>>                           eax.split.perfmon_v2 = 1;
>>>>>>                           ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
>>>>>>                                                        KVM_AMD_PMC_MAX_GENERIC);
>>>>>>                   }
>>>>>>                   if (perfctr_core) {
>>>>>>                           ebx.split.num_core_pmc = max(ebx.split.num_core_pmc,
>>>>>>                                                        AMD64_NUM_COUNTERS_CORE);
>>>>>>                   }
>>>>>
>>>>> This still isn't quite right. All AMD CPUs must support a minimum of 4 PMCs.
>>>>
>>>> K7 at least. I could not confirm that all antique AMD CPUs have 4 counters w/o
>>>> perfctr_core.
>>>
>>> The APM says, "All implementations support the base set of four
>>> performance counter / event-select pairs." That is unequivocal.
>>>
>>
>> That is true. The same can be inferred from amd_core_pmu_init() in
>> arch/x86/events/amd/core.c. If PERFCTR_CORE is not detected, it assumes
>> that the four legacy counters are always available.
>>
>> - Sandipan
Jim Mattson Sept. 8, 2022, 11:14 p.m. UTC | #9
On Wed, Sep 7, 2022 at 11:00 PM Sandipan Das <sandipan.das@amd.com> wrote:
> This is the suggested method for detecting the number of counters:
>
>   If CPUID Fn8000_0022_EAX[PerfMonV2] is set, then use the new interface in
>   CPUID Fn8000_0022_EBX to determine the number of counters.
>
>   Else if CPUID Fn8000_0001_ECX[PerfCtrExtCore] is set, then six counters
>   are available.
>
>   Else, four legacy counters are available.
>
> There will be an APM update that will have this information in the
> "Detecting Hardware Support for Performance Counters" section.

Nonetheless, for compatibility with old software, Fn8000_0022_EBX can
never report less than four counters (or six, if
Fn8000_0001_ECX[PerfCtrExtCore] is set).
diff mbox series

Patch

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f6fc8dd51ef4..c848f504e467 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -214,6 +214,14 @@  union cpuid_0x80000022_ebx {
 	unsigned int		full;
 };
 
+union cpuid_0x80000022_eax {
+	struct {
+		/* Performance Monitoring Version 2 Supported */
+		unsigned int	perfmon_v2:1;
+	} split;
+	unsigned int		full;
+};
+
 struct x86_pmu_capability {
 	int		version;
 	int		num_counters_gp;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 75dcf7a72605..08a29ab096d2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1094,7 +1094,7 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->edx = 0;
 		break;
 	case 0x80000000:
-		entry->eax = min(entry->eax, 0x80000021);
+		entry->eax = min(entry->eax, 0x80000022);
 		/*
 		 * Serializing LFENCE is reported in a multitude of ways, and
 		 * NullSegClearsBase is not reported in CPUID on Zen2; help
@@ -1203,6 +1203,25 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
 			entry->eax |= BIT(6);
 		break;
+	/* AMD Extended Performance Monitoring and Debug */
+	case 0x80000022: {
+		union cpuid_0x80000022_eax eax;
+		union cpuid_0x80000022_ebx ebx;
+
+		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+		if (!enable_pmu)
+			break;
+
+		if (kvm_pmu_cap.version > 1) {
+			/* AMD PerfMon is only supported up to V2 in the KVM. */
+			eax.split.perfmon_v2 = 1;
+			ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
+						     KVM_AMD_PMC_MAX_GENERIC);
+		}
+		entry->eax = eax.full;
+		entry->ebx = ebx.full;
+		break;
+	}
 	/*Add support for Centaur's CPUID instruction*/
 	case 0xC0000000:
 		/*Just support up to 0xC0000004 now*/