diff mbox series

[kvm-unit-tests,v3,13/13] x86/pmu: Update testcases to cover AMD PMU

Message ID 20220819110939.78013-14-likexu@tencent.com (mailing list archive)
State New, archived
Headers show
Series x86/pmu: Test case optimization, fixes and additions | expand

Commit Message

Like Xu Aug. 19, 2022, 11:09 a.m. UTC
From: Like Xu <likexu@tencent.com>

AMD core PMU before Zen4 did not have version numbers, there were
no fixed counters, it had a hard-coded number of generic counters,
bit-width, and only hardware events common across amd generations
(starting with K7) were added to amd_gp_events[] table.

All above differences are instantiated at the detection step, and it
also covers the K7 PMU registers, which is consistent with bare-metal.

Signed-off-by: Like Xu <likexu@tencent.com>
---
 lib/x86/msr.h       | 17 ++++++++++++
 lib/x86/processor.h | 32 ++++++++++++++++++++--
 x86/pmu.c           | 67 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 106 insertions(+), 10 deletions(-)

Comments

Sandipan Das Sept. 6, 2022, 7:32 a.m. UTC | #1
On 8/19/2022 4:39 PM, Like Xu wrote:
> From: Like Xu <likexu@tencent.com>
> 
> AMD core PMU before Zen4 did not have version numbers, there were
> no fixed counters, it had a hard-coded number of generic counters,
> bit-width, and only hardware events common across amd generations
> (starting with K7) were added to amd_gp_events[] table.
> 
> All above differences are instantiated at the detection step, and it
> also covers the K7 PMU registers, which is consistent with bare-metal.
> 
> Signed-off-by: Like Xu <likexu@tencent.com>
> ---
>  lib/x86/msr.h       | 17 ++++++++++++
>  lib/x86/processor.h | 32 ++++++++++++++++++++--
>  x86/pmu.c           | 67 ++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 106 insertions(+), 10 deletions(-)
> 
> [...]

Reviewed-by: Sandipan Das <sandipan.das@amd.com>
Sean Christopherson Oct. 5, 2022, 10:48 p.m. UTC | #2
On Fri, Aug 19, 2022, Like Xu wrote:
> diff --git a/lib/x86/processor.h b/lib/x86/processor.h
> index 0324220..10bca27 100644
> --- a/lib/x86/processor.h
> +++ b/lib/x86/processor.h
> @@ -793,6 +793,9 @@ static inline void flush_tlb(void)
>  
>  static inline u8 pmu_version(void)
>  {
> +	if (!is_intel())
> +		return 0;
> +
>  	return cpuid(10).a & 0xff;
>  }
>  
> @@ -806,19 +809,39 @@ static inline bool this_cpu_has_perf_global_ctrl(void)
>  	return pmu_version() > 1;
>  }
>  
> +#define AMD64_NUM_COUNTERS                             4
> +#define AMD64_NUM_COUNTERS_CORE                                6
> +
> +static inline bool has_amd_perfctr_core(void)
> +{
> +	return cpuid(0x80000001).c & BIT_ULL(23);

Add an X86_FEATURE_*, maybe X86_FEATURE_AMD_PERF_EXTENSIONS?

> +}
> +
>  static inline u8 pmu_nr_gp_counters(void)
>  {
> -	return (cpuid(10).a >> 8) & 0xff;
> +	if (is_intel()) {

No curly braces.

> +		return (cpuid(10).a >> 8) & 0xff;
> +	} else if (!has_amd_perfctr_core()) {

Drop the "else", the above "if" is terminal.

> +		return AMD64_NUM_COUNTERS;
> +	}
> +
> +	return AMD64_NUM_COUNTERS_CORE;
>  }
>  
>  static inline u8 pmu_gp_counter_width(void)
>  {
> -	return (cpuid(10).a >> 16) & 0xff;
> +	if (is_intel())
> +		return (cpuid(10).a >> 16) & 0xff;
> +	else
> +		return 48;

Please add a #define for this magic number.

>  }
>  
>  static inline u8 pmu_gp_counter_mask_length(void)
>  {
> -	return (cpuid(10).a >> 24) & 0xff;
> +	if (is_intel())
> +		return (cpuid(10).a >> 24) & 0xff;
> +	else
> +		return pmu_nr_gp_counters();
>  }
>  
>  static inline u8 pmu_nr_fixed_counters(void)
> @@ -843,6 +866,9 @@ static inline u8 pmu_fixed_counter_width(void)
>  
>  static inline bool pmu_gp_counter_is_available(int i)
>  {
> +	if (!is_intel())
> +		return i < pmu_nr_gp_counters();
> +
>  	/* CPUID.0xA.EBX bit is '1 if they counter is NOT available. */
>  	return !(cpuid(10).b & BIT(i));
>  }
> diff --git a/x86/pmu.c b/x86/pmu.c
> index 0706cb1..b6ab10c 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -62,6 +62,11 @@ struct pmu_event {
>  	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
>  	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
>  	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
> +}, amd_gp_events[] = {
> +	{"core cycles", 0x0076, 1*N, 50*N},
> +	{"instructions", 0x00c0, 10*N, 10.2*N},
> +	{"branches", 0x00c2, 1*N, 1.1*N},
> +	{"branch misses", 0x00c3, 0, 0.1*N},
>  };
>  
>  #define PMU_CAP_FW_WRITES	(1ULL << 13)
> @@ -105,14 +110,24 @@ static bool check_irq(void)
>  
>  static bool is_gp(pmu_counter_t *evt)
>  {
> +	if (!is_intel())
> +		return true;
> +
>  	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
>  		evt->ctr >= MSR_IA32_PMC0;
>  }
>  
>  static int event_to_global_idx(pmu_counter_t *cnt)
>  {
> -	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
> -		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
> +	if (is_intel())
> +		return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
> +			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
> +
> +	if (gp_counter_base == MSR_F15H_PERF_CTR0) {

Unnecessary curly braces.

> +		return (cnt->ctr - gp_counter_base) / 2;
> +	} else {
> +		return cnt->ctr - gp_counter_base;
> +	}
>  }
>  
>  static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
> @@ -736,5 +783,11 @@ int main(int ac, char **av)
>  		report_prefix_pop();
>  	}
>  
> +	if (!is_intel()) {
> +		report_prefix_push("K7");
> +		amd_switch_to_non_perfctr_core();
> +		check_counters();

"K7" prefix needs to be popped.

> +	}
> +
>  	return report_summary();
>  }
> -- 
> 2.37.2
>
diff mbox series

Patch

diff --git a/lib/x86/msr.h b/lib/x86/msr.h
index 252e041..5f16a58 100644
--- a/lib/x86/msr.h
+++ b/lib/x86/msr.h
@@ -130,6 +130,23 @@ 
 #define MSR_AMD64_IBSDCPHYSAD		0xc0011039
 #define MSR_AMD64_IBSCTL		0xc001103a
 
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL              0xc0010200
+#define MSR_F15H_PERF_CTL0             MSR_F15H_PERF_CTL
+#define MSR_F15H_PERF_CTL1             (MSR_F15H_PERF_CTL + 2)
+#define MSR_F15H_PERF_CTL2             (MSR_F15H_PERF_CTL + 4)
+#define MSR_F15H_PERF_CTL3             (MSR_F15H_PERF_CTL + 6)
+#define MSR_F15H_PERF_CTL4             (MSR_F15H_PERF_CTL + 8)
+#define MSR_F15H_PERF_CTL5             (MSR_F15H_PERF_CTL + 10)
+
+#define MSR_F15H_PERF_CTR              0xc0010201
+#define MSR_F15H_PERF_CTR0             MSR_F15H_PERF_CTR
+#define MSR_F15H_PERF_CTR1             (MSR_F15H_PERF_CTR + 2)
+#define MSR_F15H_PERF_CTR2             (MSR_F15H_PERF_CTR + 4)
+#define MSR_F15H_PERF_CTR3             (MSR_F15H_PERF_CTR + 6)
+#define MSR_F15H_PERF_CTR4             (MSR_F15H_PERF_CTR + 8)
+#define MSR_F15H_PERF_CTR5             (MSR_F15H_PERF_CTR + 10)
+
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
 #define FAM10H_MMIO_CONF_ENABLE		(1<<0)
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 0324220..10bca27 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -793,6 +793,9 @@  static inline void flush_tlb(void)
 
 static inline u8 pmu_version(void)
 {
+	if (!is_intel())
+		return 0;
+
 	return cpuid(10).a & 0xff;
 }
 
@@ -806,19 +809,39 @@  static inline bool this_cpu_has_perf_global_ctrl(void)
 	return pmu_version() > 1;
 }
 
+#define AMD64_NUM_COUNTERS                             4
+#define AMD64_NUM_COUNTERS_CORE                                6
+
+static inline bool has_amd_perfctr_core(void)
+{
+	return cpuid(0x80000001).c & BIT_ULL(23);
+}
+
 static inline u8 pmu_nr_gp_counters(void)
 {
-	return (cpuid(10).a >> 8) & 0xff;
+	if (is_intel()) {
+		return (cpuid(10).a >> 8) & 0xff;
+	} else if (!has_amd_perfctr_core()) {
+		return AMD64_NUM_COUNTERS;
+	}
+
+	return AMD64_NUM_COUNTERS_CORE;
 }
 
 static inline u8 pmu_gp_counter_width(void)
 {
-	return (cpuid(10).a >> 16) & 0xff;
+	if (is_intel())
+		return (cpuid(10).a >> 16) & 0xff;
+	else
+		return 48;
 }
 
 static inline u8 pmu_gp_counter_mask_length(void)
 {
-	return (cpuid(10).a >> 24) & 0xff;
+	if (is_intel())
+		return (cpuid(10).a >> 24) & 0xff;
+	else
+		return pmu_nr_gp_counters();
 }
 
 static inline u8 pmu_nr_fixed_counters(void)
@@ -843,6 +866,9 @@  static inline u8 pmu_fixed_counter_width(void)
 
 static inline bool pmu_gp_counter_is_available(int i)
 {
+	if (!is_intel())
+		return i < pmu_nr_gp_counters();
+
 	/* CPUID.0xA.EBX bit is '1 if they counter is NOT available. */
 	return !(cpuid(10).b & BIT(i));
 }
diff --git a/x86/pmu.c b/x86/pmu.c
index 0706cb1..b6ab10c 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -62,6 +62,11 @@  struct pmu_event {
 	{"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
 	{"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
 	{"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
+}, amd_gp_events[] = {
+	{"core cycles", 0x0076, 1*N, 50*N},
+	{"instructions", 0x00c0, 10*N, 10.2*N},
+	{"branches", 0x00c2, 1*N, 1.1*N},
+	{"branch misses", 0x00c3, 0, 0.1*N},
 };
 
 #define PMU_CAP_FW_WRITES	(1ULL << 13)
@@ -105,14 +110,24 @@  static bool check_irq(void)
 
 static bool is_gp(pmu_counter_t *evt)
 {
+	if (!is_intel())
+		return true;
+
 	return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 ||
 		evt->ctr >= MSR_IA32_PMC0;
 }
 
 static int event_to_global_idx(pmu_counter_t *cnt)
 {
-	return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
-		(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
+	if (is_intel())
+		return cnt->ctr - (is_gp(cnt) ? gp_counter_base :
+			(MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
+
+	if (gp_counter_base == MSR_F15H_PERF_CTR0) {
+		return (cnt->ctr - gp_counter_base) / 2;
+	} else {
+		return cnt->ctr - gp_counter_base;
+	}
 }
 
 static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
@@ -150,11 +165,17 @@  static void global_disable(pmu_counter_t *cnt)
 
 static inline uint32_t get_gp_counter_msr(unsigned int i)
 {
+	if (gp_counter_base == MSR_F15H_PERF_CTR0)
+		return gp_counter_base + 2 * i;
+
 	return gp_counter_base + i;
 }
 
 static inline uint32_t get_gp_select_msr(unsigned int i)
 {
+	if (gp_select_base == MSR_F15H_PERF_CTL0)
+		return gp_select_base + 2 * i;
+
 	return gp_select_base + i;
 }
 
@@ -334,6 +355,9 @@  static void check_counter_overflow(void)
 			cnt.count &= (1ull << pmu_gp_counter_width()) - 1;
 
 		if (i == nr_gp_counters) {
+			if (!is_intel())
+				break;
+
 			cnt.ctr = fixed_events[0].unit_sel;
 			__measure(&cnt, 0);
 			count = cnt.count;
@@ -494,7 +518,7 @@  static void check_running_counter_wrmsr(void)
 static void check_emulated_instr(void)
 {
 	uint64_t status, instr_start, brnch_start;
-	unsigned int branch_idx = 5;
+	unsigned int branch_idx = is_intel() ? 5 : 2;
 	pmu_counter_t brnch_cnt = {
 		.ctr = get_gp_counter_msr(0),
 		/* branch instructions */
@@ -695,13 +719,35 @@  static bool detect_intel_pmu(void)
 	return true;
 }
 
-static bool pmu_is_detected(void)
+static void amd_switch_to_non_perfctr_core(void)
 {
-	if (!is_intel()) {
-		report_skip("AMD PMU is not supported.");
+	gp_counter_base = MSR_K7_PERFCTR0;
+	gp_select_base = MSR_K7_EVNTSEL0;
+	nr_gp_counters = AMD64_NUM_COUNTERS;
+}
+
+static bool detect_amd_pmu(void)
+{
+	if (!has_amd_perfctr_core()) {
+		report_skip("Missing perfctr_core, unsupported AMD PMU.");
 		return false;
 	}
 
+	nr_gp_counters = pmu_nr_gp_counters();
+	gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
+	gp_events = (PMU_EVENTS_ARRAY_t *)amd_gp_events;
+	gp_counter_base = MSR_F15H_PERF_CTR0;
+	gp_select_base = MSR_F15H_PERF_CTL0;
+
+	report_prefix_push("AMD");
+	return true;
+}
+
+static bool pmu_is_detected(void)
+{
+	if (!is_intel())
+		return detect_amd_pmu();
+
 	return detect_intel_pmu();
 }
 
@@ -714,7 +760,8 @@  int main(int ac, char **av)
 	if (!pmu_is_detected())
 		return report_summary();
 
-	set_ref_cycle_expectations();
+	if (is_intel())
+		set_ref_cycle_expectations();
 
 	printf("PMU version:         %d\n", pmu_version());
 	printf("GP counters:         %d\n", nr_gp_counters);
@@ -736,5 +783,11 @@  int main(int ac, char **av)
 		report_prefix_pop();
 	}
 
+	if (!is_intel()) {
+		report_prefix_push("K7");
+		amd_switch_to_non_perfctr_core();
+		check_counters();
+	}
+
 	return report_summary();
 }