diff mbox series

[v2,3/3] KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022

Message ID 20220919093453.71737-4-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Add AMD Guest PerfMonV2 PMU support | expand

Commit Message

Like Xu Sept. 19, 2022, 9:34 a.m. UTC
From: Sandipan Das <sandipan.das@amd.com>

From: Sandipan Das <sandipan.das@amd.com>

CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
new performance monitoring features for AMD processors.

Bit 0 of EAX indicates support for Performance Monitoring
Version 2 (PerfMonV2) features. If found to be set during
PMU initialization, the EBX bits of the same CPUID function
can be used to determine the number of available PMCs for
different PMU types.

Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
that guests can make use of the PerfMonV2 features.

Co-developed-by: Like Xu <likexu@tencent.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
 arch/x86/include/asm/perf_event.h |  8 ++++++++
 arch/x86/kvm/cpuid.c              | 32 ++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

Comments

Jim Mattson Sept. 21, 2022, 12:02 a.m. UTC | #1
On Mon, Sep 19, 2022 at 2:35 AM Like Xu <like.xu.linux@gmail.com> wrote:
>
> From: Sandipan Das <sandipan.das@amd.com>
>
> From: Sandipan Das <sandipan.das@amd.com>
>
> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> new performance monitoring features for AMD processors.
>
> Bit 0 of EAX indicates support for Performance Monitoring
> Version 2 (PerfMonV2) features. If found to be set during
> PMU initialization, the EBX bits of the same CPUID function
> can be used to determine the number of available PMCs for
> different PMU types.
>
> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> that guests can make use of the PerfMonV2 features.
>
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Sean Christopherson Oct. 27, 2022, 10:37 p.m. UTC | #2
On Mon, Sep 19, 2022, Like Xu wrote:
> From: Sandipan Das <sandipan.das@amd.com>
> 
> From: Sandipan Das <sandipan.das@amd.com>

Duplicate "From:"s.

> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
> new performance monitoring features for AMD processors.

Wrap changelogs closer to ~75 chars.

> Bit 0 of EAX indicates support for Performance Monitoring
> Version 2 (PerfMonV2) features. If found to be set during
> PMU initialization, the EBX bits of the same CPUID function
> can be used to determine the number of available PMCs for
> different PMU types.
> 
> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
> that guests can make use of the PerfMonV2 features.
> 
> Co-developed-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Like Xu <likexu@tencent.com>
> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
> ---
>  arch/x86/include/asm/perf_event.h |  8 ++++++++
>  arch/x86/kvm/cpuid.c              | 32 ++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index f6fc8dd51ef4..c848f504e467 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>  	unsigned int		full;
>  };
>  
> +union cpuid_0x80000022_eax {
> +	struct {
> +		/* Performance Monitoring Version 2 Supported */
> +		unsigned int	perfmon_v2:1;
> +	} split;
> +	unsigned int		full;
> +};

I'm not a fan of perf's unions, but I at least understand the value added for
CPUID entries that are a bunch of multi-bit values.  However, this leaf appears
to be a pure features leaf.  In which case a union just makes life painful.

Please add a CPUID_8000_0022_EAX kvm_only_cpuid_leafs entry (details in link[*]
below) so that KVM can write sane code like

	guest_cpuid_has(X86_FEATURE_AMD_PMU_V2)

and cpuid_entry_override() instead of manually filling in information.

where appropriate.

[*] https://lore.kernel.org/all/Y1AQX3RfM+awULlE@google.com

>  struct x86_pmu_capability {
>  	int		version;
>  	int		num_counters_gp;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 75dcf7a72605..34ba845c91b7 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		entry->edx = 0;
>  		break;
>  	case 0x80000000:
> -		entry->eax = min(entry->eax, 0x80000021);
> +		entry->eax = min(entry->eax, 0x80000022);
>  		/*
>  		 * Serializing LFENCE is reported in a multitude of ways, and
>  		 * NullSegClearsBase is not reported in CPUID on Zen2; help
> @@ -1203,6 +1203,36 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>  			entry->eax |= BIT(6);
>  		break;
> +	/* AMD Extended Performance Monitoring and Debug */
> +	case 0x80000022: {
> +		union cpuid_0x80000022_eax eax;
> +		union cpuid_0x80000022_ebx ebx;
> +
> +		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
> +		if (!enable_pmu)

Shouldn't

	case 0xa: { /* Architectural Performance Monitoring */

also check enable_pmu instead of X86_FEATURE_ARCH_PERFMON?

> +			break;
> +
> +		if (kvm_pmu_cap.version > 1) {
> +			/* AMD PerfMon is only supported up to V2 in the KVM. */
> +			eax.split.perfmon_v2 = 1;

With a proper CPUID_8000_0022_EAX, this becomes:

		entry->ecx = entry->edx = 0;
		if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_AMD_PMU_V2)) {
			entry->eax = entry->ebx;
			break;
		}

		cpuid_entry_override(entry, CPUID_8000_0022_EAX);

		...

		entry->ebx = ebx.full;
Like Xu Nov. 10, 2022, 9:26 a.m. UTC | #3
On 28/10/2022 6:37 am, Sean Christopherson wrote:
> On Mon, Sep 19, 2022, Like Xu wrote:
>> From: Sandipan Das <sandipan.das@amd.com>
>>
>> From: Sandipan Das <sandipan.das@amd.com>
> 
> Duplicate "From:"s.
> 
>> CPUID leaf 0x80000022 i.e. ExtPerfMonAndDbg advertises some
>> new performance monitoring features for AMD processors.
> 
> Wrap changelogs closer to ~75 chars.
> 
>> Bit 0 of EAX indicates support for Performance Monitoring
>> Version 2 (PerfMonV2) features. If found to be set during
>> PMU initialization, the EBX bits of the same CPUID function
>> can be used to determine the number of available PMCs for
>> different PMU types.
>>
>> Expose the relevant bits via KVM_GET_SUPPORTED_CPUID so
>> that guests can make use of the PerfMonV2 features.
>>
>> Co-developed-by: Like Xu <likexu@tencent.com>
>> Signed-off-by: Like Xu <likexu@tencent.com>
>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>> ---
>>   arch/x86/include/asm/perf_event.h |  8 ++++++++
>>   arch/x86/kvm/cpuid.c              | 32 ++++++++++++++++++++++++++++++-
>>   2 files changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index f6fc8dd51ef4..c848f504e467 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -214,6 +214,14 @@ union cpuid_0x80000022_ebx {
>>   	unsigned int		full;
>>   };
>>   
>> +union cpuid_0x80000022_eax {
>> +	struct {
>> +		/* Performance Monitoring Version 2 Supported */
>> +		unsigned int	perfmon_v2:1;
>> +	} split;
>> +	unsigned int		full;
>> +};
> 
> I'm not a fan of perf's unions, but I at least understand the value added for
> CPUID entries that are a bunch of multi-bit values.  However, this leaf appears
> to be a pure features leaf.  In which case a union just makes life painful.
> 
> Please add a CPUID_8000_0022_EAX kvm_only_cpuid_leafs entry (details in link[*]
> below) so that KVM can write sane code like
> 
> 	guest_cpuid_has(X86_FEATURE_AMD_PMU_V2)
> 
> and cpuid_entry_override() instead of manually filling in information.
> 
> where appropriate.
> 
> [*] https://lore.kernel.org/all/Y1AQX3RfM+awULlE@google.com

When someone is selling syntactic sugar in the kernel space, extra attention
needs to be paid to runtime performance (union) and memory footprint 
(reverse_cpuid).

Applied for this case, while the cpuid_0x80000022_eax will be used again
in the perf core since the other new AMD PMU features are pacing at the door.

> 
>>   struct x86_pmu_capability {
>>   	int		version;
>>   	int		num_counters_gp;
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index 75dcf7a72605..34ba845c91b7 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -1094,7 +1094,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>   		entry->edx = 0;
>>   		break;
>>   	case 0x80000000:
>> -		entry->eax = min(entry->eax, 0x80000021);
>> +		entry->eax = min(entry->eax, 0x80000022);
>>   		/*
>>   		 * Serializing LFENCE is reported in a multitude of ways, and
>>   		 * NullSegClearsBase is not reported in CPUID on Zen2; help
>> @@ -1203,6 +1203,36 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>>   		if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
>>   			entry->eax |= BIT(6);
>>   		break;
>> +	/* AMD Extended Performance Monitoring and Debug */
>> +	case 0x80000022: {
>> +		union cpuid_0x80000022_eax eax;
>> +		union cpuid_0x80000022_ebx ebx;
>> +
>> +		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
>> +		if (!enable_pmu)
> 
> Shouldn't
> 
> 	case 0xa: { /* Architectural Performance Monitoring */
> 
> also check enable_pmu instead of X86_FEATURE_ARCH_PERFMON?

Applied as a separate patch, though KVM will have zero-padded kvm_pmu_cap to do
subsequent assignments when !enable_pmu but that doesn't hurt.

> 
>> +			break;
>> +
>> +		if (kvm_pmu_cap.version > 1) {
>> +			/* AMD PerfMon is only supported up to V2 in the KVM. */
>> +			eax.split.perfmon_v2 = 1;
> 
> With a proper CPUID_8000_0022_EAX, this becomes:
> 
> 		entry->ecx = entry->edx = 0;
> 		if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_AMD_PMU_V2)) {
> 			entry->eax = entry->ebx;
> 			break;
> 		}
> 
> 		cpuid_entry_override(entry, CPUID_8000_0022_EAX);
> 
> 		...

Then in this code block, we will have:

	/* AMD PerfMon is only supported up to V2 in the KVM. */
	entry->eax |= BIT(0);

to cover AMD Perfmon V3+, any better move ?

> 
> 		entry->ebx = ebx.full;
Sean Christopherson Nov. 10, 2022, 5:34 p.m. UTC | #4
On Thu, Nov 10, 2022, Like Xu wrote:
> On 28/10/2022 6:37 am, Sean Christopherson wrote:
> > I'm not a fan of perf's unions, but I at least understand the value added for
> > CPUID entries that are a bunch of multi-bit values.  However, this leaf appears
> > to be a pure features leaf.  In which case a union just makes life painful.
> > 
> > Please add a CPUID_8000_0022_EAX kvm_only_cpuid_leafs entry (details in link[*]
> > below) so that KVM can write sane code like
> > 
> > 	guest_cpuid_has(X86_FEATURE_AMD_PMU_V2)
> > 
> > and cpuid_entry_override() instead of manually filling in information.
> > 
> > where appropriate.
> > 
> > [*] https://lore.kernel.org/all/Y1AQX3RfM+awULlE@google.com
> 
> When someone is selling syntactic sugar in the kernel space, extra attention
> needs to be paid to runtime performance (union) and memory footprint
> (reverse_cpuid).

No.  Just no.

First off, this is more than syntactic sugar.  KVM has had multiple bugs in the
past due to manually querying/filling CPUID entries.  The reverse-CPUID infrastructure
guards against some of those bugs by limiting potential bugs to the leaf definition
and the feature definition.  I.e. we only need to get the cpuid_leafs+X86_FEATURE_*
definitions correct.

Second, this code is not remotely performance sensitive, and the memory footprint
of the reverse_cpuid table is laughably small.  It's literally 8 bytes per entry
FOR THE ENTIRE KERNEL.  And that's ignoring the fact that the table might even be
optimized away entirely since it's really just a switch statement that doesn't
use a helper function.

> > With a proper CPUID_8000_0022_EAX, this becomes:
> > 
> > 		entry->ecx = entry->edx = 0;
> > 		if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_AMD_PMU_V2)) {
> > 			entry->eax = entry->ebx;
> > 			break;
> > 		}
> > 
> > 		cpuid_entry_override(entry, CPUID_8000_0022_EAX);
> > 
> > 		...
> 
> Then in this code block, we will have:
> 
> 	/* AMD PerfMon is only supported up to V2 in the KVM. */
> 	entry->eax |= BIT(0);

I can't tell exactly what you're suggesting, but if you're implying that you don't
want to add CPUID_8000_0022_EAX, then NAK.  Open coding CPUID feature bit
manipulations in KVM is not acceptable.

If I'm misunderstanding and there's something that isn't handled by
cpuid_entry_override(), then the correct way to force a CPUID feature bit is:

	cpuid_entry_set(entry, X86_FEATURE_AMD_PMU_V2);

> to cover AMD Perfmon V3+, any better move ?

Huh?  If/when V3+ comes along, the above

	cpuid_entry_override(entry, CPUID_8000_0022_EAX);

will continue to do the right thing because KVM will (a) advertise V2 if it's
supported in hardware and (b) NOT advertise V3+ because the relevant CPUID bit(s)
will not be set in kvm_cpu_caps until KVM gains the necessary support.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f6fc8dd51ef4..c848f504e467 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -214,6 +214,14 @@  union cpuid_0x80000022_ebx {
 	unsigned int		full;
 };
 
+union cpuid_0x80000022_eax {
+	struct {
+		/* Performance Monitoring Version 2 Supported */
+		unsigned int	perfmon_v2:1;
+	} split;
+	unsigned int		full;
+};
+
 struct x86_pmu_capability {
 	int		version;
 	int		num_counters_gp;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 75dcf7a72605..34ba845c91b7 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1094,7 +1094,7 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->edx = 0;
 		break;
 	case 0x80000000:
-		entry->eax = min(entry->eax, 0x80000021);
+		entry->eax = min(entry->eax, 0x80000022);
 		/*
 		 * Serializing LFENCE is reported in a multitude of ways, and
 		 * NullSegClearsBase is not reported in CPUID on Zen2; help
@@ -1203,6 +1203,36 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
 			entry->eax |= BIT(6);
 		break;
+	/* AMD Extended Performance Monitoring and Debug */
+	case 0x80000022: {
+		union cpuid_0x80000022_eax eax;
+		union cpuid_0x80000022_ebx ebx;
+
+		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
+		if (!enable_pmu)
+			break;
+
+		if (kvm_pmu_cap.version > 1) {
+			/* AMD PerfMon is only supported up to V2 in the KVM. */
+			eax.split.perfmon_v2 = 1;
+			ebx.split.num_core_pmc = min(kvm_pmu_cap.num_counters_gp,
+						     KVM_AMD_PMC_MAX_GENERIC);
+		}
+
+		if (kvm_cpu_cap_has(X86_FEATURE_PERFCTR_CORE)) {
+			ebx.split.num_core_pmc = max_t(unsigned int,
+						       ebx.split.num_core_pmc,
+						       AMD64_NUM_COUNTERS_CORE);
+		} else {
+			ebx.split.num_core_pmc = max_t(unsigned int,
+						       ebx.split.num_core_pmc,
+						       AMD64_NUM_COUNTERS);
+		}
+
+		entry->eax = eax.full;
+		entry->ebx = ebx.full;
+		break;
+	}
 	/*Add support for Centaur's CPUID instruction*/
 	case 0xC0000000:
 		/*Just support up to 0xC0000004 now*/