diff mbox series

[kvm-unit-tests,v7,13/18] x86: pmu: Improve instruction and branches events verification

Message ID 20250215013636.1214612-14-seanjc@google.com (mailing list archive)
State New
Headers show
Series x86/pmu: Fixes and improvements | expand

Commit Message

Sean Christopherson Feb. 15, 2025, 1:36 a.m. UTC
From: Dapeng Mi <dapeng1.mi@linux.intel.com>

If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in
__precise_count_loop(). Thus, instructions and branches events can be
verified against a precise count instead of a rough range.

Unfortunately, AMD CPUs count VMRUN as a branch instruction in guest
context, which leads to intermittent failures as the counts will vary
depending on how many asynchronous exits occur while running the measured
code, e.g. if the host takes IRQs, NMIs, etc.

So only enable this precise check for Intel processors.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Link: https://lore.kernel.org/all/6d512a14-ace1-41a3-801e-0beb41425734@amd.com
[sean: explain AMD VMRUN behavior, use "INSNS"]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 x86/pmu.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
diff mbox series

Patch

diff --git a/x86/pmu.c b/x86/pmu.c
index 06d867d9..217ab938 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -19,6 +19,10 @@ 
 #define EXPECTED_INSTR 17
 #define EXPECTED_BRNCH 5
 
+/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
+#define EXTRA_INSNS  (3 + 3)
+#define LOOP_INSNS   (N * 10 + EXTRA_INSNS)
+#define LOOP_BRANCHES  (N)
 #define LOOP_ASM(_wrmsr)						\
 	_wrmsr "\n\t"							\
 	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
@@ -123,6 +127,27 @@  static inline void loop(u64 cntrs)
 		__precise_loop(cntrs);
 }
 
+static void adjust_events_range(struct pmu_event *gp_events,
+				int instruction_idx, int branch_idx)
+{
+	/*
+	 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
+	 * moved in __precise_loop(). Thus, instructions and branches events
+	 * can be verified against a precise count instead of a rough range.
+	 *
+	 * Skip the precise checks on AMD, as AMD CPUs count VMRUN as a branch
+	 * instruction in guest context, which* leads to intermittent failures
+	 * as the counts will vary depending on how many asynchronous VM-Exits
+	 * occur while running the measured code, e.g. if the host takes IRQs.
+	 */
+	if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) {
+		gp_events[instruction_idx].min = LOOP_INSNS;
+		gp_events[instruction_idx].max = LOOP_INSNS;
+		gp_events[branch_idx].min = LOOP_BRANCHES;
+		gp_events[branch_idx].max = LOOP_BRANCHES;
+	}
+}
+
 volatile uint64_t irq_received;
 
 static void cnt_overflow(isr_regs_t *regs)
@@ -833,6 +858,9 @@  static void check_invalid_rdpmc_gp(void)
 
 int main(int ac, char **av)
 {
+	int instruction_idx;
+	int branch_idx;
+
 	setup_vm();
 	handle_irq(PMI_VECTOR, cnt_overflow);
 	buf = malloc(N*64);
@@ -846,13 +874,18 @@  int main(int ac, char **av)
 		}
 		gp_events = (struct pmu_event *)intel_gp_events;
 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
+		instruction_idx = INTEL_INSTRUCTIONS_IDX;
+		branch_idx = INTEL_BRANCHES_IDX;
 		report_prefix_push("Intel");
 		set_ref_cycle_expectations();
 	} else {
 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
 		gp_events = (struct pmu_event *)amd_gp_events;
+		instruction_idx = AMD_INSTRUCTIONS_IDX;
+		branch_idx = AMD_BRANCHES_IDX;
 		report_prefix_push("AMD");
 	}
+	adjust_events_range(gp_events, instruction_idx, branch_idx);
 
 	printf("PMU version:         %d\n", pmu.version);
 	printf("GP counters:         %d\n", pmu.nr_gp_counters);