@@ -14,6 +14,8 @@ extern bool cpu_has_hv;
extern bool cpu_has_power_mce;
extern bool cpu_has_siar;
extern bool cpu_has_heai;
+extern bool cpu_has_bhrb;
+extern bool cpu_has_p10_bhrb;
extern bool cpu_has_radix;
extern bool cpu_has_prefix;
extern bool cpu_has_sc_lev;
@@ -40,10 +40,19 @@
#define SPR_LPIDR 0x13f
#define SPR_HEIR 0x153
#define SPR_PTCR 0x1d0
+#define SPR_MMCRA 0x312
+#define MMCRA_BHRBRD UL(0x0000002000000000)
+#define MMCRA_IFM_MASK UL(0x00000000c0000000)
+#define SPR_PMC5 0x317
+#define SPR_PMC6 0x318
#define SPR_MMCR0 0x31b
#define MMCR0_FC UL(0x80000000)
+#define MMCR0_FCP UL(0x20000000)
#define MMCR0_PMAE UL(0x04000000)
+#define MMCR0_BHRBA UL(0x00200000)
+#define MMCR0_FCPC UL(0x00001000)
#define MMCR0_PMAO UL(0x00000080)
+#define MMCR0_FC56 UL(0x00000010)
#define SPR_SIAR 0x31c
/* Machine State Register definitions: */
@@ -10,6 +10,7 @@
#define NR_CPUS 8 /* arbitrarily set for now */
extern uint64_t tb_hz;
+extern uint64_t cpu_hz;
#define NR_MEM_REGIONS 8
#define MR_F_PRIMARY (1U << 0)
@@ -33,6 +33,7 @@ u32 initrd_size;
u32 cpu_to_hwid[NR_CPUS] = { [0 ... NR_CPUS-1] = (~0U) };
int nr_cpus_present;
uint64_t tb_hz;
+uint64_t cpu_hz;
struct mem_region mem_regions[NR_MEM_REGIONS];
phys_addr_t __physical_start, __physical_end;
@@ -42,6 +43,7 @@ struct cpu_set_params {
unsigned icache_bytes;
unsigned dcache_bytes;
uint64_t tb_hz;
+ uint64_t cpu_hz;
};
static void cpu_set(int fdtnode, u64 regval, void *info)
@@ -95,6 +97,22 @@ static void cpu_set(int fdtnode, u64 regval, void *info)
data = (u32 *)prop->data;
params->tb_hz = fdt32_to_cpu(*data);
+ prop = fdt_get_property(dt_fdt(), fdtnode,
+ "ibm,extended-clock-frequency", NULL);
+ if (prop) {
+ data = (u32 *)prop->data;
+ params->cpu_hz = fdt32_to_cpu(*data);
+ params->cpu_hz <<= 32;
+ data = (u32 *)prop->data + 1;
+ params->cpu_hz |= fdt32_to_cpu(*data);
+ } else {
+ prop = fdt_get_property(dt_fdt(), fdtnode,
+ "clock-frequency", NULL);
+ assert(prop != NULL);
+ data = (u32 *)prop->data;
+ params->cpu_hz = fdt32_to_cpu(*data);
+ }
+
read_common_info = true;
}
}
@@ -103,6 +121,8 @@ bool cpu_has_hv;
bool cpu_has_power_mce; /* POWER CPU machine checks */
bool cpu_has_siar;
bool cpu_has_heai;
+bool cpu_has_bhrb;
+bool cpu_has_p10_bhrb;
bool cpu_has_radix;
bool cpu_has_prefix;
bool cpu_has_sc_lev; /* sc interrupt has LEV field in SRR1 */
@@ -119,12 +139,14 @@ static void cpu_init_params(void)
__icache_bytes = params.icache_bytes;
__dcache_bytes = params.dcache_bytes;
tb_hz = params.tb_hz;
+ cpu_hz = params.cpu_hz;
switch (mfspr(SPR_PVR) & PVR_VERSION_MASK) {
case PVR_VER_POWER10:
cpu_has_prefix = true;
cpu_has_sc_lev = true;
cpu_has_pause_short = true;
+ cpu_has_p10_bhrb = true;
case PVR_VER_POWER9:
cpu_has_radix = true;
case PVR_VER_POWER8E:
@@ -133,6 +155,7 @@ static void cpu_init_params(void)
cpu_has_power_mce = true;
cpu_has_heai = true;
cpu_has_siar = true;
+ cpu_has_bhrb = true;
break;
default:
break;
@@ -17,7 +17,8 @@ tests-common = \
$(TEST_DIR)/smp.elf \
$(TEST_DIR)/sprs.elf \
$(TEST_DIR)/timebase.elf \
- $(TEST_DIR)/interrupts.elf
+ $(TEST_DIR)/interrupts.elf \
+ $(TEST_DIR)/pmu.elf
tests-all = $(tests-common) $(tests)
all: directories $(TEST_DIR)/boot_rom.bin $(tests-all)
new file mode 100644
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: LGPL-2.0-only */
+/*
+ * Test PMU
+ *
+ * Copyright 2024 Nicholas Piggin, IBM Corp.
+ */
+#include <libcflat.h>
+#include <util.h>
+#include <migrate.h>
+#include <alloc.h>
+#include <asm/setup.h>
+#include <asm/handlers.h>
+#include <asm/hcall.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/barrier.h>
+#include <asm/mmu.h>
+#include "alloc_phys.h"
+#include "vmalloc.h"
+
+static volatile bool got_interrupt;
+static volatile struct pt_regs recorded_regs;
+static volatile unsigned long recorded_mmcr0;
+
+static void illegal_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+ regs_advance_insn(regs);
+}
+
+static void sc_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+}
+
+static void reset_mmcr0(void)
+{
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_PMAE | MMCR0_PMAO));
+}
+
+static __attribute__((__noinline__)) unsigned long pmc5_count_nr_insns(unsigned long nr)
+{
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("mtctr %0 ; 1: bdnz 1b" :: "r"(nr) : "ctr");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+
+ return mfspr(SPR_PMC5);
+}
+
+static void test_pmc5_with_fault(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+
+ handle_exception(0x700, &illegal_handler, NULL);
+ handle_exception(0xe40, &illegal_handler, NULL);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".long 0x0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 20 ; nop ; .endr ; .long 0x0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_2 = mfspr(SPR_PMC5);
+
+ report(pmc5_1 + 20 == pmc5_2, "PMC5 counts instructions with fault");
+
+ handle_exception(0x700, NULL, NULL);
+ handle_exception(0xe40, NULL, NULL);
+}
+
+static void test_pmc5_with_sc(void)
+{
+ unsigned long pmc5_1, pmc5_2;
+
+ handle_exception(0xc00, &sc_handler, NULL);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile("sc 0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_1 = mfspr(SPR_PMC5);
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56));
+ asm volatile(".rep 20 ; nop ; .endr ; sc 0" ::: "memory");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ assert(got_interrupt);
+ got_interrupt = false;
+ pmc5_2 = mfspr(SPR_PMC5);
+
+ report(pmc5_1 + 20 == pmc5_2, "PMC5 counts instructions with syscall");
+
+ handle_exception(0xc00, NULL, NULL);
+}
+
+static void test_pmc56(void)
+{
+ unsigned long tmp;
+
+ report_prefix_push("pmc56");
+
+ reset_mmcr0();
+ mtspr(SPR_PMC5, 0);
+ mtspr(SPR_PMC6, 0);
+ report(mfspr(SPR_PMC5) == 0, "PMC5 zeroed");
+ report(mfspr(SPR_PMC6) == 0, "PMC6 zeroed");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC);
+ msleep(100);
+ report(mfspr(SPR_PMC5) == 0, "PMC5 frozen");
+ report(mfspr(SPR_PMC6) == 0, "PMC6 frozen");
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC56);
+ mdelay(100);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56));
+ report(mfspr(SPR_PMC5) != 0, "PMC5 counting");
+ report(mfspr(SPR_PMC6) != 0, "PMC6 counting");
+
+ /* Dynamic frequency scaling could cause to be out, so don't fail. */
+ tmp = mfspr(SPR_PMC6);
+ report(true, "PMC6 ratio to reported clock frequency is %ld%%", tmp * 1000 / cpu_hz);
+
+ tmp = pmc5_count_nr_insns(100);
+ tmp = pmc5_count_nr_insns(1000) - tmp;
+ report(tmp == 900, "PMC5 counts instructions precisely");
+
+ test_pmc5_with_fault();
+ test_pmc5_with_sc();
+
+ report_prefix_pop();
+}
+
+static void dec_ignore_handler(struct pt_regs *regs, void *data)
+{
+ mtspr(SPR_DEC, 0x7fffffff);
+}
+
+static void pmi_handler(struct pt_regs *regs, void *data)
+{
+ got_interrupt = true;
+ memcpy((void *)&recorded_regs, regs, sizeof(struct pt_regs));
+ recorded_mmcr0 = mfspr(SPR_MMCR0);
+ if (mfspr(SPR_MMCR0) & MMCR0_PMAO) {
+ /* This may cause infinite interrupts, so clear it. */
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAO);
+ }
+}
+
+static void test_pmi(void)
+{
+ report_prefix_push("pmi");
+ handle_exception(0x900, &dec_ignore_handler, NULL);
+ handle_exception(0xf00, &pmi_handler, NULL);
+ reset_mmcr0();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAO);
+ mtmsr(mfmsr() | MSR_EE);
+ mtmsr(mfmsr() & ~MSR_EE);
+ report(got_interrupt, "PMAO caused interrupt");
+ got_interrupt = false;
+ handle_exception(0xf00, NULL, NULL);
+ handle_exception(0x900, NULL, NULL);
+ report_prefix_pop();
+}
+
+static void clrbhrb(void)
+{
+ asm volatile("clrbhrb" ::: "memory");
+}
+
+static inline unsigned long mfbhrbe(int nr)
+{
+ unsigned long e;
+
+ asm volatile("mfbhrbe %0,%1" : "=r"(e) : "i"(nr) : "memory");
+
+ return e;
+}
+
+extern unsigned char dummy_branch_1[];
+extern unsigned char dummy_branch_2[];
+
+static __attribute__((__noinline__)) void bhrb_dummy(int i)
+{
+ asm volatile(
+ " cmpdi %0,1 \n\t"
+ " beq 1f \n\t"
+ ".global dummy_branch_1 \n\t"
+ "dummy_branch_1: \n\t"
+ " b 2f \n\t"
+ "1: trap \n\t"
+ ".global dummy_branch_2 \n\t"
+ "dummy_branch_2: \n\t"
+ "2: bne 3f \n\t"
+ " trap \n\t"
+ "3: nop \n\t"
+ : : "r"(i));
+}
+
+#define NR_BHRBE 16
+static unsigned long bhrbe[NR_BHRBE];
+static int nr_bhrbe;
+
+static void run_and_load_bhrb(void)
+{
+ int i;
+
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ clrbhrb();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_BHRBA);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FCP | MMCR0_FCPC));
+ mtspr(SPR_MMCRA, mfspr(SPR_MMCRA) & ~(MMCRA_BHRBRD | MMCRA_IFM_MASK));
+
+ if (cpu_has_p10_bhrb) {
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ enter_usermode();
+ bhrb_dummy(0);
+ exit_usermode();
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ } else {
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ mtmsr(mfmsr());
+ asm volatile(".rept 100 ; nop ; .endr");
+ bhrb_dummy(0);
+ mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_PMAE);
+ asm volatile("isync" ::: "memory");
+ }
+
+ bhrbe[0] = mfbhrbe(0);
+ bhrbe[1] = mfbhrbe(1);
+ bhrbe[2] = mfbhrbe(2);
+ bhrbe[3] = mfbhrbe(3);
+ bhrbe[4] = mfbhrbe(4);
+ bhrbe[5] = mfbhrbe(5);
+ bhrbe[6] = mfbhrbe(6);
+ bhrbe[7] = mfbhrbe(7);
+ bhrbe[8] = mfbhrbe(8);
+ bhrbe[9] = mfbhrbe(9);
+ bhrbe[10] = mfbhrbe(10);
+ bhrbe[11] = mfbhrbe(11);
+ bhrbe[12] = mfbhrbe(12);
+ bhrbe[13] = mfbhrbe(13);
+ bhrbe[14] = mfbhrbe(14);
+ bhrbe[15] = mfbhrbe(15);
+
+ for (i = 0; i < NR_BHRBE; i++) {
+ bhrbe[i] &= ~0x1UL; /* remove prediction bit */
+ if (!bhrbe[i])
+ break;
+ }
+ nr_bhrbe = i;
+}
+
+static void test_bhrb(void)
+{
+ int i;
+
+ if (cpu_has_p10_bhrb && !vm_available())
+ return;
+
+ report_prefix_push("bhrb");
+
+ /* TCG doesn't impelment BHRB yet */
+ handle_exception(0x700, &illegal_handler, NULL);
+ handle_exception(0xe40, &illegal_handler, NULL);
+ clrbhrb();
+ handle_exception(0x700, NULL, NULL);
+ handle_exception(0xe40, NULL, NULL);
+ if (got_interrupt) {
+ got_interrupt = false;
+ report_skip("BHRB support missing");
+ report_prefix_pop();
+ return;
+ }
+
+ handle_exception(0x900, &illegal_handler, NULL);
+
+ if (vm_available()) {
+ handle_exception(0x900, &dec_ignore_handler, NULL);
+ setup_vm();
+ }
+ reset_mmcr0();
+ clrbhrb();
+ if (cpu_has_p10_bhrb) {
+ enter_usermode();
+ bhrb_dummy(0);
+ exit_usermode();
+ } else {
+ bhrb_dummy(0);
+ }
+ report(mfbhrbe(0) == 0, "BHRB is frozen");
+
+ /*
+ * BHRB may be cleared at any time (e.g., by OS or hypervisor)
+ * so this test could be occasionally incorrect. Try several
+ * times before giving up...
+ */
+
+ if (cpu_has_p10_bhrb) {
+ /*
+ * BHRB should have 8 entries:
+ * 1. enter_usermode blr
+ * 2. enter_usermode blr target
+ * 3. bl dummy
+ * 4. dummy unconditional
+ * 5. dummy conditional
+ * 6. dummy blr
+ * 7. dummy blr target
+ * 8. exit_usermode bl
+ *
+ * POWER10 often gives 4 entries, if other threads are
+ * running on the core, it seems to struggle.
+ */
+ for (i = 0; i < 200; i++) {
+ run_and_load_bhrb();
+ if (nr_bhrbe == 8)
+ break;
+ if (i > 100 && nr_bhrbe == 4)
+ break;
+ }
+ if (nr_bhrbe != 8)
+ printf("nr_bhrbe=%d\n", nr_bhrbe);
+ report(nr_bhrbe, "BHRB has been written");
+ if (nr_bhrbe == 8) {
+ report(nr_bhrbe == 8, "BHRB has written 8 entries");
+ report(bhrbe[4] == (unsigned long)dummy_branch_1,
+ "correct unconditional branch address");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ } else if (nr_bhrbe == 4) {
+ /* POWER10 workaround */
+ report(nr_bhrbe == 4, "BHRB has written 4 entries");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ }
+ } else {
+ /*
+ * BHRB should have 6 entries:
+ * 1. bl dummy
+ * 2. dummy unconditional
+ * 3. dummy conditional
+ * 4. dummy blr
+ * 5. dummy blr target
+ * 6. Final b loop before disabled.
+ *
+ * POWER9 often gives 4 entries, if other threads are
+ * running on the core, it seems to struggle.
+ */
+ for (i = 0; i < 200; i++) {
+ run_and_load_bhrb();
+ if (nr_bhrbe == 6)
+ break;
+ if (i > 100 && nr_bhrbe == 4)
+ break;
+ }
+ report(nr_bhrbe, "BHRB has been written");
+ report(nr_bhrbe == 6, "BHRB has written 6 entries");
+ if (nr_bhrbe == 6) {
+ report(bhrbe[4] == (unsigned long)dummy_branch_1,
+ "correct unconditional branch address");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ } else if (nr_bhrbe == 4) {
+ /* POWER9 workaround */
+ report(nr_bhrbe == 4, "BHRB has written 4 entries");
+ report(bhrbe[3] == (unsigned long)dummy_branch_2,
+ "correct conditional branch address");
+ }
+ }
+
+ handle_exception(0x900, NULL, NULL);
+
+ report_prefix_pop();
+}
+
+int main(int argc, char **argv)
+{
+ report_prefix_push("pmu");
+
+ test_pmc56();
+ test_pmi();
+ if (cpu_has_bhrb)
+ test_bhrb();
+
+ report_prefix_pop();
+
+ return report_summary();
+}
@@ -74,6 +74,9 @@ file = emulator.elf
[interrupts]
file = interrupts.elf
+[pmu]
+file = pmu.elf
+
[smp]
file = smp.elf
smp = 2
Add some initial PMU testing. - PMC5/6 tests - PMAE / PMI test - BHRB basic tests Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- lib/powerpc/asm/processor.h | 2 + lib/powerpc/asm/reg.h | 9 + lib/powerpc/asm/setup.h | 1 + lib/powerpc/setup.c | 23 ++ powerpc/Makefile.common | 3 +- powerpc/pmu.c | 405 ++++++++++++++++++++++++++++++++++++ powerpc/unittests.cfg | 3 + 7 files changed, 445 insertions(+), 1 deletion(-) create mode 100644 powerpc/pmu.c