[kvm-unit-tests,RFC,3/3] x86/pmu: Create AMD PMU test code

Message ID	1501820670-23194-4-git-send-email-wei@redhat.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com BE4E98046A From: Wei Huang <wei@redhat.com> To: kvm@vger.kernel.org Cc: pbonzini@redhat.com, rkrcmar@redhat.com, wei@redhat.com Subject: [kvm-unit-tests RFC 3/3] x86/pmu: Create AMD PMU test code Date: Thu, 3 Aug 2017 23:24:30 -0500 Message-Id: <1501820670-23194-4-git-send-email-wei@redhat.com> In-Reply-To: <1501820670-23194-1-git-send-email-wei@redhat.com> References: <1501820670-23194-1-git-send-email-wei@redhat.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

Message ID

1501820670-23194-4-git-send-email-wei@redhat.com (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com BE4E98046A
From: Wei Huang <wei@redhat.com>
To: kvm@vger.kernel.org
Cc: pbonzini@redhat.com, rkrcmar@redhat.com, wei@redhat.com
Subject: [kvm-unit-tests RFC 3/3] x86/pmu: Create AMD PMU test code
Date: Thu,  3 Aug 2017 23:24:30 -0500
Message-Id: <1501820670-23194-4-git-send-email-wei@redhat.com>
In-Reply-To: <1501820670-23194-1-git-send-email-wei@redhat.com>
References: <1501820670-23194-1-git-send-email-wei@redhat.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Commit Message

Wei Huang Aug. 4, 2017, 4:24 a.m. UTC

AMD PMU implementation is very different from Intel. This patch adds
a new test case, called amd_pmu, to x86 architecture. The
implementation of amd_pmu.c is based on intel_pmu.c, with focus on
AMD's general-purpose registers. To avoid running on Intel CPUs,
we check the CPU's vendor name before executing the tests.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 x86/Makefile.common |   3 +-
 x86/amd_pmu.c       | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg   |   4 +
 3 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 x86/amd_pmu.c

Comments

Andrew Jones Aug. 4, 2017, 8:14 a.m. UTC | #1

Hi Wei,

I didn't review the important part of this test, i.e. what it's
testing and how, so below are just some nits.

On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote:
> AMD PMU implementation is very different from Intel. This patch adds
> a new test case, called amd_pmu, to x86 architecture. The
> implementation of amd_pmu.c is based on intel_pmu.c, with focus on
> AMD's general-purpose registers. To avoid running on Intel CPUs,
> we check the CPU's vendor name before executing the tests.
> 
> Signed-off-by: Wei Huang <wei@redhat.com>
> ---
>  x86/Makefile.common |   3 +-
>  x86/amd_pmu.c       | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/unittests.cfg   |   4 +
>  3 files changed, 271 insertions(+), 1 deletion(-)
>  create mode 100644 x86/amd_pmu.c
> 
> diff --git a/x86/Makefile.common b/x86/Makefile.common
> index d0f4ed1..36bcf90 100644
> --- a/x86/Makefile.common
> +++ b/x86/Makefile.common
> @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
>                 $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \
>                 $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \
>                 $(TEST_DIR)/kvmclock_test.flat  $(TEST_DIR)/eventinj.flat \
> -               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \
> +               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \
> +	       $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \

mixing tabs and spaces here

>                 $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
>                 $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
>                 $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
> diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c
> new file mode 100644
> index 0000000..006eccf
> --- /dev/null
> +++ b/x86/amd_pmu.c
> @@ -0,0 +1,265 @@
> +/*
> + * vPMU testing for AMD CPUs
> + *
> + * Copyright (c) 2017 Red Hat Inc
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +
> +#include "x86/msr.h"
> +#include "x86/processor.h"
> +#include "x86/apic-defs.h"
> +#include "x86/apic.h"

apic-defs is included by apic

> +#include "x86/desc.h"
> +#include "x86/isr.h"
> +#include "x86/vm.h"
> +
> +#include "libcflat.h"
> +#include "pmu.h"
> +#include <stdint.h>

stdint is included by libcflat

> +
> +#define AMD64_NUM_COUNTERS		4
> +
> +#define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
> +#define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
> +#define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
> +
> +struct pmu_event gp_events[] = {
> +	{"core cycles", 0x0076, 1*N, 50*N},
> +	{"instructions", 0x00c0, 10*N, 10.2*N},
> +	{"cache reference", 0x077d, 1, 2*N},
> +	{"cache misses", 0x077e, 0, 1*N},
> +	{"branches", 0x00c2, 1*N, 1.1*N},
> +	{"branch misses", 0x00c3, 0, 0.1*N},
> +	{"stalls frontend", 0x00d0, 0, 0.1*N},
> +	{"stalls backend", 0x00d1, 0, 30*N},
> +};
> +
> +char *buf;
> +static int num_counters;
> +volatile uint64_t irq_received;

all above globals can be static

> +
> +static bool check_irq(void)
> +{
> +	int i;
> +	irq_received = 0;
> +
> +	irq_enable();
> +	for (i = 0; i < 100000 && !irq_received; i++)
> +		asm volatile("pause");
> +	irq_disable();
> +
> +	return irq_received;
> +}
> +
> +static inline void loop()
> +{
> +	unsigned long tmp, tmp2, tmp3;
> +
> +	asm volatile("1: mov (%1), %2\n\t"
> +		     "   add $64, %1\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   nop\n\t"
> +		     "   loop 1b"
> +		     : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
> +		     : "0"(N), "1"(buf));
> +}
> +
> +static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
> +{
> +	int i;
> +
> +	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
> +		if (gp_events[i].unit_sel == (cnt->config & 0xffff))
> +			return &gp_events[i];
> +
> +	return (void*)0;

NULL

> +}
> +
> +static void cnt_overflow(isr_regs_t *regs)
> +{
> +	irq_received++;
> +	apic_write(APIC_EOI, 0);
> +}
> +
> +static int event_to_global_idx(pmu_counter_t *cnt)
> +{
> +	return cnt->ctr - MSR_K7_PERFCTR0;
> +}
> +
> +static bool verify_event(uint64_t count, struct pmu_event *e)
> +{
> +	return count >= e->min && count <= e->max;
> +}
> +
> +static bool verify_counter(pmu_counter_t *cnt)
> +{
> +	return verify_event(cnt->count, get_counter_event(cnt));
> +}
> +
> +static void start_event(pmu_counter_t *evt)
> +{
> +	wrmsr(evt->ctr, evt->count);
> +	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
> +	      evt->config | EVNTSEL_EN);
> +}
> +
> +static void stop_event(pmu_counter_t *evt)
> +{
> +	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
> +	      evt->config & ~EVNTSEL_EN);
> +	evt->count = rdmsr(evt->ctr);
> +}
> +
> +static void measure(pmu_counter_t *evt, int count)
> +{
> +	int i;
> +
> +	for (i = 0; i < count; i++)
> +		start_event(&evt[i]);
> +
> +	loop();
> +
> +	for (i = 0; i < count; i++)
> +		stop_event(&evt[i]);
> +}
> +
> +static void check_gp_counter(struct pmu_event *evt)
> +{
> +	int i;
> +
> +	pmu_counter_t cnt = {
> +		.ctr = MSR_K7_PERFCTR0,
> +		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
> +	};
> +
> +	for (i = 0; i < num_counters; i++, cnt.ctr++) {
> +		cnt.count = 0;
> +		measure(&cnt, 1);
> +		report("%s-%d", verify_event(cnt.count, evt), evt->name, i);
> +	}
> +}
> +
> +static void check_gp_counters(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) {
> +		check_gp_counter(&gp_events[i]);
> +	}
> +}
> +
> +static void check_counter_overflow(void)
> +{
> +	uint64_t count;
> +	int i;
> +	pmu_counter_t cnt = {
> +		.ctr = MSR_K7_PERFCTR0,
> +		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
> +		.count = 0,
> +	};
> +
> +	measure(&cnt, 1);
> +	count = cnt.count;
> +
> +	report_prefix_push("overflow");
> +
> +	for (i = 0; i < num_counters; i++, cnt.ctr++) {
> +		if (i % 2)
> +			cnt.config |= EVNTSEL_INT;
> +		else
> +			cnt.config &= ~EVNTSEL_INT;
> +		cnt.count = 1 - count;
> +		measure(&cnt, 1);
> +		report("cntr-%d", cnt.count < 20, i);
> +		report("irq-%d", check_irq() == (i % 2), i);
> +	}
> +
> +	report_prefix_pop();
> +}
> +
> +static void check_gp_counter_cmask(void)
> +{
> +	pmu_counter_t cnt = {
> +		.ctr = MSR_K7_PERFCTR0,
> +		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
> +		.count = 0,
> +	};
> +
> +	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
> +	measure(&cnt, 1);
> +	report("cmask", cnt.count < gp_events[1].min);
> +}
> +
> +static void check_rdpmc(void)
> +{
> +	uint64_t val = 0x1f3456789ull;
> +	int i;
> +
> +	report_prefix_push("rdpmc");
> +
> +	for (i = 0; i < num_counters; i++) {
> +		uint64_t x = val;
> +		wrmsr(MSR_K7_PERFCTR0 + i, val);
> +		report("cntr-%d", rdpmc(i) == x, i);
> +	}
> +
> +	report_prefix_pop();
> +}
> +
> +static void check_counters_many(void)
> +{
> +	pmu_counter_t cnt[10];
> +	int i, n;
> +
> +	for (n = 0; n < num_counters; n++) {
> +		cnt[n].count = 0;
> +		cnt[n].ctr = MSR_K7_PERFCTR0 + n;
> +		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel;
> +	}
> +
> +	measure(cnt, n);
> +
> +	for (i = 0; i < n; i++)
> +		if (!verify_counter(&cnt[i]))
> +			break;

{} for the for loop, might be nice here 

> +
> +	report("all counters", i == n);
> +}
> +
> +#define IS_AMD_CPU(cpuid)  ((cpuid).b == CPUID_VENDOR_AMD_1 && \
> +			    (cpuid).d == CPUID_VENDOR_AMD_2 && \
> +			    (cpuid).c == CPUID_VENDOR_AMD_3)

should add a blank line here

> +int main(int ac, char **av)
> +{
> +	struct cpuid id = cpuid(0);
> +
> +	setup_vm();
> +	setup_idt();
> +	handle_irq(PC_VECTOR, cnt_overflow);
> +	buf = vmalloc(N * 64);
> +
> +	if (!IS_AMD_CPU(id)) {
> +		printf("No AMD PMU detected!\n");
> +		return report_summary();
> +	}
> +
> +	num_counters = AMD64_NUM_COUNTERS;
> +	if (num_counters > ARRAY_SIZE(gp_events))
> +		num_counters = ARRAY_SIZE(gp_events);
> +
> +	apic_write(APIC_LVTPC, PC_VECTOR);
> +
> +	check_gp_counters();
> +	check_rdpmc();
> +	check_counters_many();
> +	check_counter_overflow();
> +	check_gp_counter_cmask();
> +
> +	return report_summary();
> +}
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index 74156fa..98fb3e9 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -145,6 +145,10 @@ file = intel_pmu.flat
>  extra_params = -cpu host
>  check = /proc/sys/kernel/nmi_watchdog=0
>  
> +[amd_pmu]
> +file = amd_pmu.flat
> +extra_params = -cpu host

maybe add 'arch = x86_64', since AMD only supports 64 bit.

> +
>  [port80]
>  file = port80.flat
>  
> -- 
> 2.7.5
>

Thanks,
drew

Wei Huang Aug. 4, 2017, 3:49 p.m. UTC | #2

On 08/04/2017 03:14 AM, Andrew Jones wrote:
> Hi Wei,
> 
> I didn't review the important part of this test, i.e. what it's
> testing and how, so below are just some nits.

Thanks for reviewing it. I agree with most nits below, and will fix them
in the next respin.

> 
> On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote:
>> AMD PMU implementation is very different from Intel. This patch adds
>> a new test case, called amd_pmu, to x86 architecture. The
>> implementation of amd_pmu.c is based on intel_pmu.c, with focus on
>> AMD's general-purpose registers. To avoid running on Intel CPUs,
>> we check the CPU's vendor name before executing the tests.
>>
>> Signed-off-by: Wei Huang <wei@redhat.com>
>> ---
>>  x86/Makefile.common |   3 +-
>>  x86/amd_pmu.c       | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  x86/unittests.cfg   |   4 +
>>  3 files changed, 271 insertions(+), 1 deletion(-)
>>  create mode 100644 x86/amd_pmu.c
>>
>> diff --git a/x86/Makefile.common b/x86/Makefile.common
>> index d0f4ed1..36bcf90 100644
>> --- a/x86/Makefile.common
>> +++ b/x86/Makefile.common
>> @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
>>                 $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \
>>                 $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \
>>                 $(TEST_DIR)/kvmclock_test.flat  $(TEST_DIR)/eventinj.flat \
>> -               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \
>> +               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \
>> +	       $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \
> 
> mixing tabs and spaces here
> 
>>                 $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
>>                 $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
>>                 $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
>> diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c
>> new file mode 100644
>> index 0000000..006eccf
>> --- /dev/null
>> +++ b/x86/amd_pmu.c
>> @@ -0,0 +1,265 @@
>> +/*
>> + * vPMU testing for AMD CPUs
>> + *
>> + * Copyright (c) 2017 Red Hat Inc
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + */
>> +
>> +#include "x86/msr.h"
>> +#include "x86/processor.h"
>> +#include "x86/apic-defs.h"
>> +#include "x86/apic.h"
> 
> apic-defs is included by apic
> 
>> +#include "x86/desc.h"
>> +#include "x86/isr.h"
>> +#include "x86/vm.h"
>> +
>> +#include "libcflat.h"
>> +#include "pmu.h"
>> +#include <stdint.h>
> 
> stdint is included by libcflat
> 
>> +
>> +#define AMD64_NUM_COUNTERS		4
>> +
>> +#define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
>> +#define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
>> +#define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
>> +
>> +struct pmu_event gp_events[] = {
>> +	{"core cycles", 0x0076, 1*N, 50*N},
>> +	{"instructions", 0x00c0, 10*N, 10.2*N},
>> +	{"cache reference", 0x077d, 1, 2*N},
>> +	{"cache misses", 0x077e, 0, 1*N},
>> +	{"branches", 0x00c2, 1*N, 1.1*N},
>> +	{"branch misses", 0x00c3, 0, 0.1*N},
>> +	{"stalls frontend", 0x00d0, 0, 0.1*N},
>> +	{"stalls backend", 0x00d1, 0, 30*N},
>> +};
>> +
>> +char *buf;
>> +static int num_counters;
>> +volatile uint64_t irq_received;
> 
> all above globals can be static
> 
>> +
>> +static bool check_irq(void)
>> +{
>> +	int i;
>> +	irq_received = 0;
>> +
>> +	irq_enable();
>> +	for (i = 0; i < 100000 && !irq_received; i++)
>> +		asm volatile("pause");
>> +	irq_disable();
>> +
>> +	return irq_received;
>> +}
>> +
>> +static inline void loop()
>> +{
>> +	unsigned long tmp, tmp2, tmp3;
>> +
>> +	asm volatile("1: mov (%1), %2\n\t"
>> +		     "   add $64, %1\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   nop\n\t"
>> +		     "   loop 1b"
>> +		     : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
>> +		     : "0"(N), "1"(buf));
>> +}
>> +
>> +static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
>> +		if (gp_events[i].unit_sel == (cnt->config & 0xffff))
>> +			return &gp_events[i];
>> +
>> +	return (void*)0;
> 
> NULL
> 
>> +}
>> +
>> +static void cnt_overflow(isr_regs_t *regs)
>> +{
>> +	irq_received++;
>> +	apic_write(APIC_EOI, 0);
>> +}
>> +
>> +static int event_to_global_idx(pmu_counter_t *cnt)
>> +{
>> +	return cnt->ctr - MSR_K7_PERFCTR0;
>> +}
>> +
>> +static bool verify_event(uint64_t count, struct pmu_event *e)
>> +{
>> +	return count >= e->min && count <= e->max;
>> +}
>> +
>> +static bool verify_counter(pmu_counter_t *cnt)
>> +{
>> +	return verify_event(cnt->count, get_counter_event(cnt));
>> +}
>> +
>> +static void start_event(pmu_counter_t *evt)
>> +{
>> +	wrmsr(evt->ctr, evt->count);
>> +	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
>> +	      evt->config | EVNTSEL_EN);
>> +}
>> +
>> +static void stop_event(pmu_counter_t *evt)
>> +{
>> +	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
>> +	      evt->config & ~EVNTSEL_EN);
>> +	evt->count = rdmsr(evt->ctr);
>> +}
>> +
>> +static void measure(pmu_counter_t *evt, int count)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < count; i++)
>> +		start_event(&evt[i]);
>> +
>> +	loop();
>> +
>> +	for (i = 0; i < count; i++)
>> +		stop_event(&evt[i]);
>> +}
>> +
>> +static void check_gp_counter(struct pmu_event *evt)
>> +{
>> +	int i;
>> +
>> +	pmu_counter_t cnt = {
>> +		.ctr = MSR_K7_PERFCTR0,
>> +		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
>> +	};
>> +
>> +	for (i = 0; i < num_counters; i++, cnt.ctr++) {
>> +		cnt.count = 0;
>> +		measure(&cnt, 1);
>> +		report("%s-%d", verify_event(cnt.count, evt), evt->name, i);
>> +	}
>> +}
>> +
>> +static void check_gp_counters(void)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) {
>> +		check_gp_counter(&gp_events[i]);
>> +	}
>> +}
>> +
>> +static void check_counter_overflow(void)
>> +{
>> +	uint64_t count;
>> +	int i;
>> +	pmu_counter_t cnt = {
>> +		.ctr = MSR_K7_PERFCTR0,
>> +		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
>> +		.count = 0,
>> +	};
>> +
>> +	measure(&cnt, 1);
>> +	count = cnt.count;
>> +
>> +	report_prefix_push("overflow");
>> +
>> +	for (i = 0; i < num_counters; i++, cnt.ctr++) {
>> +		if (i % 2)
>> +			cnt.config |= EVNTSEL_INT;
>> +		else
>> +			cnt.config &= ~EVNTSEL_INT;
>> +		cnt.count = 1 - count;
>> +		measure(&cnt, 1);
>> +		report("cntr-%d", cnt.count < 20, i);
>> +		report("irq-%d", check_irq() == (i % 2), i);
>> +	}
>> +
>> +	report_prefix_pop();
>> +}
>> +
>> +static void check_gp_counter_cmask(void)
>> +{
>> +	pmu_counter_t cnt = {
>> +		.ctr = MSR_K7_PERFCTR0,
>> +		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
>> +		.count = 0,
>> +	};
>> +
>> +	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
>> +	measure(&cnt, 1);
>> +	report("cmask", cnt.count < gp_events[1].min);
>> +}
>> +
>> +static void check_rdpmc(void)
>> +{
>> +	uint64_t val = 0x1f3456789ull;
>> +	int i;
>> +
>> +	report_prefix_push("rdpmc");
>> +
>> +	for (i = 0; i < num_counters; i++) {
>> +		uint64_t x = val;
>> +		wrmsr(MSR_K7_PERFCTR0 + i, val);
>> +		report("cntr-%d", rdpmc(i) == x, i);
>> +	}
>> +
>> +	report_prefix_pop();
>> +}
>> +
>> +static void check_counters_many(void)
>> +{
>> +	pmu_counter_t cnt[10];
>> +	int i, n;
>> +
>> +	for (n = 0; n < num_counters; n++) {
>> +		cnt[n].count = 0;
>> +		cnt[n].ctr = MSR_K7_PERFCTR0 + n;
>> +		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel;
>> +	}
>> +
>> +	measure(cnt, n);
>> +
>> +	for (i = 0; i < n; i++)
>> +		if (!verify_counter(&cnt[i]))
>> +			break;
> 
> {} for the for loop, might be nice here 
> 
>> +
>> +	report("all counters", i == n);
>> +}
>> +
>> +#define IS_AMD_CPU(cpuid)  ((cpuid).b == CPUID_VENDOR_AMD_1 && \
>> +			    (cpuid).d == CPUID_VENDOR_AMD_2 && \
>> +			    (cpuid).c == CPUID_VENDOR_AMD_3)
> 
> should add a blank line here
> 
>> +int main(int ac, char **av)
>> +{
>> +	struct cpuid id = cpuid(0);
>> +
>> +	setup_vm();
>> +	setup_idt();
>> +	handle_irq(PC_VECTOR, cnt_overflow);
>> +	buf = vmalloc(N * 64);
>> +
>> +	if (!IS_AMD_CPU(id)) {
>> +		printf("No AMD PMU detected!\n");
>> +		return report_summary();
>> +	}
>> +
>> +	num_counters = AMD64_NUM_COUNTERS;
>> +	if (num_counters > ARRAY_SIZE(gp_events))
>> +		num_counters = ARRAY_SIZE(gp_events);
>> +
>> +	apic_write(APIC_LVTPC, PC_VECTOR);
>> +
>> +	check_gp_counters();
>> +	check_rdpmc();
>> +	check_counters_many();
>> +	check_counter_overflow();
>> +	check_gp_counter_cmask();
>> +
>> +	return report_summary();
>> +}
>> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
>> index 74156fa..98fb3e9 100644
>> --- a/x86/unittests.cfg
>> +++ b/x86/unittests.cfg
>> @@ -145,6 +145,10 @@ file = intel_pmu.flat
>>  extra_params = -cpu host
>>  check = /proc/sys/kernel/nmi_watchdog=0
>>  
>> +[amd_pmu]
>> +file = amd_pmu.flat
>> +extra_params = -cpu host
> 
> maybe add 'arch = x86_64', since AMD only supports 64 bit.
> 
>> +
>>  [port80]
>>  file = port80.flat
>>  
>> -- 
>> 2.7.5
>>
> 
> Thanks,
> drew 
>

diff --git a/x86/Makefile.common b/x86/Makefile.common
index d0f4ed1..36bcf90 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -45,7 +45,8 @@  tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
                $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \
                $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \
                $(TEST_DIR)/kvmclock_test.flat  $(TEST_DIR)/eventinj.flat \
-               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \
+               $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \
+	       $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \
                $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \
                $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \
                $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \
diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c
new file mode 100644
index 0000000..006eccf
--- /dev/null
+++ b/x86/amd_pmu.c
@@ -0,0 +1,265 @@ 
+/*
+ * vPMU testing for AMD CPUs
+ *
+ * Copyright (c) 2017 Red Hat Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "x86/msr.h"
+#include "x86/processor.h"
+#include "x86/apic-defs.h"
+#include "x86/apic.h"
+#include "x86/desc.h"
+#include "x86/isr.h"
+#include "x86/vm.h"
+
+#include "libcflat.h"
+#include "pmu.h"
+#include <stdint.h>
+
+#define AMD64_NUM_COUNTERS		4
+
+#define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
+#define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
+#define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
+
+struct pmu_event gp_events[] = {
+	{"core cycles", 0x0076, 1*N, 50*N},
+	{"instructions", 0x00c0, 10*N, 10.2*N},
+	{"cache reference", 0x077d, 1, 2*N},
+	{"cache misses", 0x077e, 0, 1*N},
+	{"branches", 0x00c2, 1*N, 1.1*N},
+	{"branch misses", 0x00c3, 0, 0.1*N},
+	{"stalls frontend", 0x00d0, 0, 0.1*N},
+	{"stalls backend", 0x00d1, 0, 30*N},
+};
+
+char *buf;
+static int num_counters;
+volatile uint64_t irq_received;
+
+static bool check_irq(void)
+{
+	int i;
+	irq_received = 0;
+
+	irq_enable();
+	for (i = 0; i < 100000 && !irq_received; i++)
+		asm volatile("pause");
+	irq_disable();
+
+	return irq_received;
+}
+
+static inline void loop()
+{
+	unsigned long tmp, tmp2, tmp3;
+
+	asm volatile("1: mov (%1), %2\n\t"
+		     "   add $64, %1\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   nop\n\t"
+		     "   loop 1b"
+		     : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
+		     : "0"(N), "1"(buf));
+}
+
+static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
+{
+	int i;
+
+	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
+		if (gp_events[i].unit_sel == (cnt->config & 0xffff))
+			return &gp_events[i];
+
+	return (void*)0;
+}
+
+static void cnt_overflow(isr_regs_t *regs)
+{
+	irq_received++;
+	apic_write(APIC_EOI, 0);
+}
+
+static int event_to_global_idx(pmu_counter_t *cnt)
+{
+	return cnt->ctr - MSR_K7_PERFCTR0;
+}
+
+static bool verify_event(uint64_t count, struct pmu_event *e)
+{
+	return count >= e->min && count <= e->max;
+}
+
+static bool verify_counter(pmu_counter_t *cnt)
+{
+	return verify_event(cnt->count, get_counter_event(cnt));
+}
+
+static void start_event(pmu_counter_t *evt)
+{
+	wrmsr(evt->ctr, evt->count);
+	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
+	      evt->config | EVNTSEL_EN);
+}
+
+static void stop_event(pmu_counter_t *evt)
+{
+	wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt),
+	      evt->config & ~EVNTSEL_EN);
+	evt->count = rdmsr(evt->ctr);
+}
+
+static void measure(pmu_counter_t *evt, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		start_event(&evt[i]);
+
+	loop();
+
+	for (i = 0; i < count; i++)
+		stop_event(&evt[i]);
+}
+
+static void check_gp_counter(struct pmu_event *evt)
+{
+	int i;
+
+	pmu_counter_t cnt = {
+		.ctr = MSR_K7_PERFCTR0,
+		.config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
+	};
+
+	for (i = 0; i < num_counters; i++, cnt.ctr++) {
+		cnt.count = 0;
+		measure(&cnt, 1);
+		report("%s-%d", verify_event(cnt.count, evt), evt->name, i);
+	}
+}
+
+static void check_gp_counters(void)
+{
+	int i;
+
+	for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) {
+		check_gp_counter(&gp_events[i]);
+	}
+}
+
+static void check_counter_overflow(void)
+{
+	uint64_t count;
+	int i;
+	pmu_counter_t cnt = {
+		.ctr = MSR_K7_PERFCTR0,
+		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
+		.count = 0,
+	};
+
+	measure(&cnt, 1);
+	count = cnt.count;
+
+	report_prefix_push("overflow");
+
+	for (i = 0; i < num_counters; i++, cnt.ctr++) {
+		if (i % 2)
+			cnt.config |= EVNTSEL_INT;
+		else
+			cnt.config &= ~EVNTSEL_INT;
+		cnt.count = 1 - count;
+		measure(&cnt, 1);
+		report("cntr-%d", cnt.count < 20, i);
+		report("irq-%d", check_irq() == (i % 2), i);
+	}
+
+	report_prefix_pop();
+}
+
+static void check_gp_counter_cmask(void)
+{
+	pmu_counter_t cnt = {
+		.ctr = MSR_K7_PERFCTR0,
+		.config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel,
+		.count = 0,
+	};
+
+	cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
+	measure(&cnt, 1);
+	report("cmask", cnt.count < gp_events[1].min);
+}
+
+static void check_rdpmc(void)
+{
+	uint64_t val = 0x1f3456789ull;
+	int i;
+
+	report_prefix_push("rdpmc");
+
+	for (i = 0; i < num_counters; i++) {
+		uint64_t x = val;
+		wrmsr(MSR_K7_PERFCTR0 + i, val);
+		report("cntr-%d", rdpmc(i) == x, i);
+	}
+
+	report_prefix_pop();
+}
+
+static void check_counters_many(void)
+{
+	pmu_counter_t cnt[10];
+	int i, n;
+
+	for (n = 0; n < num_counters; n++) {
+		cnt[n].count = 0;
+		cnt[n].ctr = MSR_K7_PERFCTR0 + n;
+		cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel;
+	}
+
+	measure(cnt, n);
+
+	for (i = 0; i < n; i++)
+		if (!verify_counter(&cnt[i]))
+			break;
+
+	report("all counters", i == n);
+}
+
+#define IS_AMD_CPU(cpuid)  ((cpuid).b == CPUID_VENDOR_AMD_1 && \
+			    (cpuid).d == CPUID_VENDOR_AMD_2 && \
+			    (cpuid).c == CPUID_VENDOR_AMD_3)
+int main(int ac, char **av)
+{
+	struct cpuid id = cpuid(0);
+
+	setup_vm();
+	setup_idt();
+	handle_irq(PC_VECTOR, cnt_overflow);
+	buf = vmalloc(N * 64);
+
+	if (!IS_AMD_CPU(id)) {
+		printf("No AMD PMU detected!\n");
+		return report_summary();
+	}
+
+	num_counters = AMD64_NUM_COUNTERS;
+	if (num_counters > ARRAY_SIZE(gp_events))
+		num_counters = ARRAY_SIZE(gp_events);
+
+	apic_write(APIC_LVTPC, PC_VECTOR);
+
+	check_gp_counters();
+	check_rdpmc();
+	check_counters_many();
+	check_counter_overflow();
+	check_gp_counter_cmask();
+
+	return report_summary();
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 74156fa..98fb3e9 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -145,6 +145,10 @@  file = intel_pmu.flat
 extra_params = -cpu host
 check = /proc/sys/kernel/nmi_watchdog=0
 
+[amd_pmu]
+file = amd_pmu.flat
+extra_params = -cpu host
+
 [port80]
 file = port80.flat

[kvm-unit-tests,RFC,3/3] x86/pmu: Create AMD PMU test code

Commit Message

Comments

Patch