[kvm-unit-tests,v2] x86: amd: pmu: test performance counter on AMD

Message ID	20220727124632.44253-1-darcy.sh@antgroup.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: SU Hang <darcysail@gmail.com> To: darcy.sh@antgroup.com, babu.moger@amd.com, manali.shukla@amd.com, mlevitsk@redhat.com, wei.huang2@amd.com, kvm@vger.kernel.org, pbonzini@redhat.com Subject: [kvm-unit-tests PATCH v2] x86: amd: pmu: test performance counter on AMD Date: Wed, 27 Jul 2022 20:46:32 +0800 Message-Id: <20220727124632.44253-1-darcy.sh@antgroup.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	[kvm-unit-tests,v2] x86: amd: pmu: test performance counter on AMD \| expand [kvm-unit-tests,v2] x86: amd: pmu: test performance counter on AMD

diff --git a/lib/x86/msr.h b/lib/x86/msr.h index fa1c0c8..a8aa907 100644 --- a/lib/x86/msr.h +++ b/lib/x86/msr.h @@ -129,6 +129,29 @@ #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 #define MSR_AMD64_IBSCTL 0xc001103a +/* Fam 15h MSRs */ +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 +#define MSR_F15H_IC_CFG 0xc0011021 +#define MSR_F15H_EX_CFG 0xc001102c + /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 #define FAM10H_MMIO_CONF_ENABLE (1<<0) diff --git a/x86/pmu.c b/x86/pmu.c index a46bdbf..77332b6 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -83,7 +83,9 @@ struct pmu_event { uint32_t unit_sel; int min; int max; -} gp_events[] = { +} * gp_events; + +struct pmu_event intel_gp_event[] = { {"core cycles", 0x003c, 1*N, 50*N}, {"instructions", 0x00c0, 10*N, 10.2*N}, {"ref cycles", 0x013c, 1*N, 30*N}, @@ -91,16 +93,26 @@ struct pmu_event { {"llc misses", 0x412e, 1, 1*N}, {"branches", 0x00c4, 1*N, 1.1*N}, {"branch misses", 0x00c5, 0, 0.1*N}, -}, fixed_events[] = { +}; +struct pmu_event amd_gp_event[] = { + { "core cycles", 0x0076, 1 * N, 50 * N }, + { "instructions", 0x00c0, 10 * N, 10.9 * N }, + { "llc references", 0x028f, 1, 5 * N }, + { "branches", 0x00c2, 1 * N, 1.1 * N }, + { "branch misses", 0x00c3, 0, 0.1 * N }, +}; +struct pmu_event fixed_events[] = { {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} }; #define PMU_CAP_FW_WRITES (1ULL << 13) -static u64 gp_counter_base = MSR_IA32_PERFCTR0; +static u64 gp_counter_base; static int num_counters; +static int num_gp_event; +static bool is_intel_chip; char *buf; @@ -134,6 +146,9 @@ static bool check_irq(void) static bool is_gp(pmu_counter_t *evt) { + /* MSR_F15H_PERF_CTR == 0xc0010201 + * MSR_K7_PERFCTR0 == 0xc0010004 + * both happened to greater than MSR_IA32_PMC0. */ return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || evt->ctr >= MSR_IA32_PMC0; } @@ -149,7 +164,8 @@ static struct pmu_event* get_counter_event(pmu_counter_t *cnt) if (is_gp(cnt)) { int i; - for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + for (i = 0; i < num_gp_event; i++) + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) return &gp_events[i]; } else @@ -161,6 +177,8 @@ static struct pmu_event* get_counter_event(pmu_counter_t *cnt) static void global_enable(pmu_counter_t *cnt) { cnt->idx = event_to_global_idx(cnt); + if (!is_intel_chip) + return; wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) | (1ull << cnt->idx)); @@ -168,6 +186,8 @@ static void global_enable(pmu_counter_t *cnt) static void global_disable(pmu_counter_t *cnt) { + if (!is_intel_chip) + return; wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) & ~(1ull << cnt->idx)); } @@ -176,14 +196,19 @@ static void global_disable(pmu_counter_t *cnt) static void start_event(pmu_counter_t *evt) { wrmsr(evt->ctr, evt->count); - if (is_gp(evt)) - wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), - evt->config | EVNTSEL_EN); - else { + if (is_gp(evt)) { + uint64_t sel; + if (is_intel_chip) + sel = MSR_P6_EVNTSEL0; + else if (gp_counter_base == MSR_F15H_PERF_CTR) + sel = MSR_F15H_PERF_CTL; + else + sel = MSR_K7_EVNTSEL0; + wrmsr(sel + event_to_global_idx(evt), evt->config | EVNTSEL_EN); + } else { uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; uint32_t usrospmi = 0; - if (evt->config & EVNTSEL_OS) usrospmi |= (1 << 0); if (evt->config & EVNTSEL_USR) @@ -200,10 +225,16 @@ static void start_event(pmu_counter_t *evt) static void stop_event(pmu_counter_t *evt) { global_disable(evt); - if (is_gp(evt)) - wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt), - evt->config & ~EVNTSEL_EN); - else { + if (is_gp(evt)) { + uint64_t sel; + if (is_intel_chip) + sel = MSR_P6_EVNTSEL0; + else if (gp_counter_base == MSR_F15H_PERF_CTR) + sel = MSR_F15H_PERF_CTL; + else + sel = MSR_K7_EVNTSEL0; + wrmsr(sel + event_to_global_idx(evt), evt->config & ~EVNTSEL_EN); + } else { uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); @@ -241,10 +272,16 @@ static void check_gp_counter(struct pmu_event *evt) }; int i; - for (i = 0; i < num_counters; i++, cnt.ctr++) { + for (i = 0; i < num_counters; i++) { cnt.count = 0; measure(&cnt, 1); report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); + if (gp_counter_base == MSR_F15H_PERF_CTR) + /* The counter MSRs are interleaved with the event select MSRs, + * since Family 15H. */ + cnt.ctr += 2; + else + cnt.ctr++; } } @@ -252,7 +289,7 @@ static void check_gp_counters(void) { int i; - for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + for (i = 0; i < num_gp_event; i++) if (!(ebx.full & (1 << i))) check_gp_counter(&gp_events[i]); else @@ -288,7 +325,12 @@ static void check_counters_many(void) cnt[n].count = 0; cnt[n].ctr = gp_counter_base + n; cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | - gp_events[i % ARRAY_SIZE(gp_events)].unit_sel; + gp_events[i % num_gp_event].unit_sel; + + if (gp_counter_base == MSR_F15H_PERF_CTR) + /* The counter MSRs are interleaved with the event select MSRs, + * since Family 15H. */ + cnt[i].ctr += n; n++; } for (i = 0; i < edx.split.num_counters_fixed; i++) { @@ -311,6 +353,7 @@ static void check_counter_overflow(void) { uint64_t count; int i; + int loop_times; pmu_counter_t cnt = { .ctr = gp_counter_base, .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, @@ -319,18 +362,34 @@ static void check_counter_overflow(void) measure(&cnt, 1); count = cnt.count; - /* clear status before test */ - wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); + if (is_intel_chip) + /* clear status before test */ + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); report_prefix_push("overflow"); - for (i = 0; i < num_counters + 1; i++, cnt.ctr++) { + if (is_intel_chip) + loop_times = num_counters + 1; + else + /* AMD CPU doesn't have fixed counters. */ + loop_times = num_counters; + for (i = 0; i < loop_times; i++) { uint64_t status; int idx; - cnt.count = 1 - count; - if (gp_counter_base == MSR_IA32_PMC0) + if (is_intel_chip) { + cnt.count = 1 - count; + if (gp_counter_base == MSR_IA32_PMC0) + cnt.count &= (1ull << eax.split.bit_width) - 1; + } else { + /* KVM fails to accurate count on AMD CPU, + * due to instructions in hypervisor when set + * AMD64_EVENTSEL_GUESTONLY bit? + */ + cnt.count = 1 - count * 0.5; cnt.count &= (1ull << eax.split.bit_width) - 1; + } if (i == num_counters) { cnt.ctr = fixed_events[0].unit_sel; @@ -343,13 +402,21 @@ static void check_counter_overflow(void) cnt.config &= ~EVNTSEL_INT; idx = event_to_global_idx(&cnt); measure(&cnt, 1); - report(cnt.count == 1, "cntr-%d", i); - status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); - report(status & (1ull << idx), "status-%d", i); - wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status); - status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); - report(!(status & (1ull << idx)), "status clear-%d", i); + if (is_intel_chip) { + report(cnt.count == 1, "cntr-%d", i); + status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); + report(status & (1ull << idx), "status-%d", i); + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status); + status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); + report(!(status & (1ull << idx)), "status clear-%d", i); + } report(check_irq() == (i % 2), "irq-%d", i); + if (gp_counter_base == MSR_F15H_PERF_CTR) + /* The counter MSRs are interleaved with the event select MSRs, + * since Family 15H. */ + cnt.ctr += 2; + else + cnt.ctr++; } report_prefix_pop(); @@ -381,9 +448,13 @@ static void do_rdpmc_fast(void *ptr) static void check_rdpmc(void) { - uint64_t val = 0xff0123456789ull; bool exc; int i; + uint64_t val; + if (is_intel_chip) + val = 0xff0123456789ull; + else + val = 0xffff0123456789ull; report_prefix_push("rdpmc"); @@ -406,7 +477,10 @@ static void check_rdpmc(void) /* Mask according to the number of supported bits */ x &= (1ull << eax.split.bit_width) - 1; - wrmsr(gp_counter_base + i, val); + if (gp_counter_base == MSR_F15H_PERF_CTR) + wrmsr(gp_counter_base + i * 2, val); + else + wrmsr(gp_counter_base + i, val); report(rdpmc(i) == x, "cntr-%d", i); exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); @@ -453,9 +527,10 @@ static void check_running_counter_wrmsr(void) stop_event(&evt); report(evt.count < gp_events[1].min, "cntr"); - /* clear status before overflow test */ - wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, - rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); + if (is_intel_chip) + /* clear status before overflow test */ + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_STATUS)); evt.count = 0; start_event(&evt); @@ -468,8 +543,10 @@ static void check_running_counter_wrmsr(void) loop(); stop_event(&evt); - status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); - report(status & 1, "status"); + if (is_intel_chip) { + status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS); + report(status & 1, "status"); + } report_prefix_pop(); } @@ -605,6 +682,45 @@ static void check_gp_counters_write_width(void) } } +static void check_amd_gp_counters_write_width(void) +{ + u64 val_64 = 0xffffff0123456789ull; + u64 val_32 = val_64 & ((1ull << 32) - 1); + u64 val_max_width = val_64 & ((1ull << eax.split.bit_width) - 1); + int i; + + /* There are 4 legacy counters, and 6 core PerfMon counters, + * select the less one. */ + int legacy_num_counter = 4; + for (i = 0; i < legacy_num_counter; i++) { + wrmsr(MSR_K7_PERFCTR0 + i, val_32); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_32); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_32); + + wrmsr(MSR_K7_PERFCTR0 + i, val_max_width); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_max_width); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_max_width); + + wrmsr(MSR_K7_PERFCTR0 + i, val_64); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_max_width); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_max_width); + } + + for (i = 0; i < legacy_num_counter; i++) { + wrmsr(MSR_F15H_PERF_CTR + 2 * i, val_32); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_32); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_32); + + wrmsr(MSR_F15H_PERF_CTR + 2 * i, val_max_width); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_max_width); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_max_width); + + wrmsr(MSR_F15H_PERF_CTR + 2 * i, val_64); + assert(rdmsr(MSR_K7_PERFCTR0 + i) == val_max_width); + assert(rdmsr(MSR_F15H_PERF_CTR + 2 * i) == val_max_width); + } +} + /* * Per the SDM, reference cycles are currently implemented using the * core crystal clock, TSC, or bus clock. Calibrate to the TSC @@ -655,27 +771,47 @@ static void set_ref_cycle_expectations(void) int main(int ac, char **av) { - struct cpuid id = cpuid(10); - - setup_vm(); - handle_irq(PC_VECTOR, cnt_overflow); - buf = malloc(N*64); + struct cpuid id; + is_intel_chip = is_intel(); + if (is_intel_chip) { + id = cpuid(10); + eax.full = id.a; + ebx.full = id.b; + edx.full = id.d; + gp_counter_base = MSR_IA32_PERFCTR0; + gp_events = (struct pmu_event *)&intel_gp_event; + num_gp_event = ARRAY_SIZE(intel_gp_event); + if (!eax.split.version_id) { + printf("No pmu is detected!\n"); + return report_summary(); + } - eax.full = id.a; - ebx.full = id.b; - edx.full = id.d; + if (eax.split.version_id == 1) { + printf("PMU version 1 is not supported\n"); + return report_summary(); + } + } else { + gp_counter_base = MSR_K7_PERFCTR0; + gp_events = (struct pmu_event *)&amd_gp_event; + num_gp_event = ARRAY_SIZE(amd_gp_event); + id = cpuid(1); + /* Performance-monitoring supported from K7 and later. */ + if (((id.a & 0xf00) >> 8) < 6) { + printf("No pmu is detected!\n"); + return report_summary(); + } - if (!eax.split.version_id) { - printf("No pmu is detected!\n"); - return report_summary(); + edx.split.num_counters_fixed = 0; + eax.split.num_counters = 4; + eax.split.bit_width = 48; } - if (eax.split.version_id == 1) { - printf("PMU version 1 is not supported\n"); - return report_summary(); - } + setup_vm(); + handle_irq(PC_VECTOR, cnt_overflow); + buf = malloc(N * 64); - set_ref_cycle_expectations(); + if (is_intel_chip) + set_ref_cycle_expectations(); printf("PMU version: %d\n", eax.split.version_id); printf("GP counters: %d\n", eax.split.num_counters); @@ -693,7 +829,20 @@ int main(int ac, char **av) } else { check_counters(); - if (rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) { + if (!is_intel_chip) { + id = raw_cpuid(0x80000001, 0); + if (id.c & (1 << 23)) { + /* support core perfmon */ + gp_counter_base = MSR_F15H_PERF_CTR; + eax.split.num_counters = 6; + num_counters = eax.split.num_counters; + report_prefix_push("core perf"); + check_counters(); + check_amd_gp_counters_write_width(); + } + } + + if (is_intel_chip && rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) { gp_counter_base = MSR_IA32_PMC0; report_prefix_push("full-width writes"); check_counters();

[kvm-unit-tests,v2] x86: amd: pmu: test performance counter on AMD

Commit Message

Patch