@@ -299,6 +299,7 @@
#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
+#define SYS_PMBLIMITR_EL1_RES0 0xfffffffffffff007UL
#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
@@ -323,6 +324,8 @@
#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+#define SYS_PMBSR_EL1_RES0 0x00000000fc0fffffUL
+
/*** End of Statistical Profiling Extension ***/
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
@@ -775,6 +775,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
kvm_vgic_sync_hwstate(vcpu);
+ if (vcpu_has_spe(vcpu))
+ kvm_arm_spe_sync_hwstate(vcpu);
+
/*
* Sync the timer hardware state before enabling interrupts as
* we don't want vtimer interrupts to race with syncing the
@@ -47,6 +47,14 @@ void __sysreg_save_spe_host_state_nvhe(struct kvm_cpu_context *ctxt)
void __sysreg_restore_spe_guest_state_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
+ u64 pmblimitr;
+
+ /* Disable guest profiling when the interrupt is asserted. */
+ if (spe_cpu->irq_level)
+ pmblimitr = 0;
+ else
+ pmblimitr = ctxt_sys_reg(guest_ctxt, PMBLIMITR_EL1);
__sysreg_restore_spe_state_common(guest_ctxt);
@@ -54,16 +62,24 @@ void __sysreg_restore_spe_guest_state_nvhe(struct kvm_vcpu *vcpu)
isb();
write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1);
- /* The guest buffer management event interrupt is always virtual. */
+ /* The guest buffer management interrupt is always virtual. */
write_sysreg_s(0, SYS_PMBSR_EL1);
- write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBLIMITR_EL1), SYS_PMBLIMITR_EL1);
+ write_sysreg_s(pmblimitr, SYS_PMBLIMITR_EL1);
write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMSCR_EL1), SYS_PMSCR_EL1);
}
void __sysreg_save_spe_guest_state_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
u64 pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ u64 pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
+
+ /* Update guest PMBSR_EL1 only when SPE asserts an interrupt. */
+ if (pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT)) {
+ ctxt_sys_reg(guest_ctxt, PMBSR_EL1) = pmbsr;
+ spe_cpu->pmbirq_asserted = true;
+ }
if (pmblimitr & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)) {
psb_csync();
@@ -44,6 +44,8 @@ NOKPROBE_SYMBOL(sysreg_save_spe_host_state_vhe);
void sysreg_restore_spe_guest_state_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
+ u64 pmblimitr;
/*
* Make sure the write to MDCR_EL2 which changes the buffer owning
@@ -51,6 +53,12 @@ void sysreg_restore_spe_guest_state_vhe(struct kvm_vcpu *vcpu)
*/
isb();
+ /* Disable guest profiling when the interrupt is asserted. */
+ if (spe_cpu->irq_level)
+ pmblimitr = 0;
+ else
+ pmblimitr = ctxt_sys_reg(guest_ctxt, PMBLIMITR_EL1);
+
/*
* Order doesn't matter because sampling is disabled at EL2. However,
* restore guest registers in the same program order as the host for
@@ -59,9 +67,9 @@ void sysreg_restore_spe_guest_state_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_spe_state_common(guest_ctxt);
write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1);
- /* The guest buffer management event interrupt is always virtual. */
+ /* The guest buffer management interrupt is always virtual. */
write_sysreg_s(0, SYS_PMBSR_EL1);
- write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBLIMITR_EL1), SYS_PMBLIMITR_EL1);
+ write_sysreg_s(pmblimitr, SYS_PMBLIMITR_EL1);
write_sysreg_el1(ctxt_sys_reg(guest_ctxt, PMSCR_EL1), SYS_PMSCR);
}
NOKPROBE_SYMBOL(sysreg_restore_spe_guest_state_vhe);
@@ -69,8 +77,15 @@ NOKPROBE_SYMBOL(sysreg_restore_spe_guest_state_vhe);
void sysreg_save_spe_guest_state_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
u64 pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1);
+ u64 pmbsr = read_sysreg_s(SYS_PMBSR_EL1);
+ /* Update guest PMBSR_EL1 only when SPE asserts an interrupt. */
+ if (pmbsr & BIT(SYS_PMBSR_EL1_S_SHIFT)) {
+ ctxt_sys_reg(guest_ctxt, PMBSR_EL1) = pmbsr;
+ spe_cpu->pmbirq_asserted = true;
+ }
/*
* We're going to switch buffer owning Exception level when we restore
* the host MDCR_EL2 value, make sure the buffer is disabled until we
@@ -12,11 +12,112 @@
#include <kvm/arm_vgic.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_host.h>
+
+static void kvm_arm_spe_update_irq(struct kvm_vcpu *vcpu, bool new_level)
+{
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
+ int ret;
+
+ spe_cpu->irq_level = new_level;
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, spe_cpu->irq_num,
+ new_level, spe_cpu);
+ WARN_ON(ret);
+}
+
+void kvm_arm_spe_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
+ u64 pmbsr, pmbsr_ec;
+
+ if (!spe_cpu->pmbirq_asserted)
+ return;
+ spe_cpu->pmbirq_asserted = false;
+
+ pmbsr = __vcpu_sys_reg(vcpu, PMBSR_EL1);
+ pmbsr_ec = pmbsr & (SYS_PMBSR_EL1_EC_MASK << SYS_PMBSR_EL1_EC_SHIFT);
+
+ if (pmbsr & BIT(SYS_PMBSR_EL1_EA_SHIFT)) {
+ /*
+ * The guest managed to trigger an external abort, something is
+ * very definitely broken and there is no way for us to recover.
+ * Treat it like we would if the external abort generated an
+ * SError and panic now.
+ */
+ panic("KVM SPE External Abort\n");
+ cpu_park_loop();
+ unreachable();
+ }
+
+ switch (pmbsr_ec) {
+ case SYS_PMBSR_EL1_EC_FAULT_S2:
+ /*
+ * If we see this then either the guest memory isn't pinned
+ * correctly (KVM bug or userspace got it wrong), or the guest
+ * programmed the buffer pointer with a bogus address.
+ * PMBPTR_El1 will point to the guest VA that triggered the
+ * DABT, print it as it might be useful for debugging.
+ */
+ pr_warn_ratelimited("KVM SPE Stage 2 Data Abort, pmbsr=0x%llx, pmbptr=0x%llx\n",
+ pmbsr, __vcpu_sys_reg(vcpu, PMBPTR_EL1));
+ /*
+ * Convert the stage 2 DABT into a guest SPE buffer synchronous
+ * external abort.
+ */
+ __vcpu_sys_reg(vcpu, PMBSR_EL1) = BIT(SYS_PMBSR_EL1_S_SHIFT) |
+ BIT(SYS_PMBSR_EL1_EA_SHIFT);
+ case SYS_PMBSR_EL1_EC_FAULT_S1:
+ case SYS_PMBSR_EL1_EC_BUF:
+ break;
+ default:
+ pr_warn_ratelimited("KVM SPE Unknown buffer syndrome, pmbsr=0x%llx, pmbptr=0x%llx\n",
+ pmbsr, __vcpu_sys_reg(vcpu, PMBPTR_EL1));
+ __vcpu_sys_reg(vcpu, PMBSR_EL1) = BIT(SYS_PMBSR_EL1_S_SHIFT) |
+ BIT(SYS_PMBSR_EL1_EA_SHIFT);
+ }
+
+ if (spe_cpu->irq_level)
+ return;
+
+ kvm_arm_spe_update_irq(vcpu, true);
+}
+
+static u64 kvm_arm_spe_get_reg_mask(int reg)
+{
+ switch (reg) {
+ case PMBLIMITR_EL1:
+ return SYS_PMBLIMITR_EL1_RES0;
+ case PMBPTR_EL1:
+ return ~0UL;
+ case PMBSR_EL1:
+ return SYS_PMBSR_EL1_RES0;
+ default:
+ return 0UL;
+ }
+}
void kvm_arm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val)
{
+ struct kvm_spe_cpu *spe_cpu = &vcpu->arch.spe_cpu;
+ bool irq_level;
+
WARN(reg < PMBLIMITR_EL1, "Unexpected trap to SPE register\n");
+
+ val &= kvm_arm_spe_get_reg_mask(reg);
__vcpu_sys_reg(vcpu, reg) = val;
+
+ if (reg != PMBSR_EL1)
+ return;
+
+ irq_level = val & BIT(SYS_PMBSR_EL1_S_SHIFT);
+ /*
+ * The VGIC configures PPIs as level-sensitive, we need to update the
+ * interrupt state if it changes.
+ */
+ if (spe_cpu->irq_level == irq_level)
+ return;
+
+ kvm_arm_spe_update_irq(vcpu, irq_level);
}
u64 kvm_arm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg)
@@ -16,6 +16,7 @@ struct kvm_spe_cpu {
int irq_num; /* Guest visibile INTID */
bool irq_level; /* 'true' if interrupt is asserted to the VGIC */
bool initialized; /* Feature is initialized on VCPU */
+ bool pmbirq_asserted; /* Hardware asserted PMBIRQ */
};
struct kvm_spe {
@@ -41,6 +42,7 @@ void kvm_arm_spe_notify_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val);
u64 kvm_arm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg);
+void kvm_arm_spe_sync_hwstate(struct kvm_vcpu *vcpu);
#else
#define kvm_arm_supports_spe() false
@@ -96,5 +98,7 @@ static inline u64 kvm_arm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg)
return 0;
}
+static inline void kvm_arm_spe_sync_hwstate(struct kvm_vcpu *vcpu) {}
+
#endif /* CONFIG_KVM_ARM_SPE */
#endif /* __ASM_ARM_KVM_SPE_H */
A profiling buffer management interrupt is asserted when the buffer fills, on a fault or on an external abort. The service bit, PMBSR_EL1.S, is set as long as SPE asserts this interrupt. SPE can also assert the interrupt following a direct write to PMBSR_EL1 that sets the bit. The SPE hardware stops asserting the interrupt only when the service bit is cleared. KVM emulates the interrupt by reading the value of the service bit on each guest exit to determine if the SPE hardware asserted the interrupt (for example, if the buffer was full). Writes to the buffer registers are trapped, to determine when the interrupt should be cleared or when the guest wants to explicitely assert the interrupt by setting the service bit. Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com> --- arch/arm64/include/asm/sysreg.h | 3 + arch/arm64/kvm/arm.c | 3 + arch/arm64/kvm/hyp/nvhe/spe-sr.c | 20 +++++- arch/arm64/kvm/hyp/vhe/spe-sr.c | 19 +++++- arch/arm64/kvm/spe.c | 101 +++++++++++++++++++++++++++++++ include/kvm/arm_spe.h | 4 ++ 6 files changed, 146 insertions(+), 4 deletions(-)