diff mbox

[v6,12/13] KVM: arm64: Handle RAS SErrors from EL2 on guest exit

Message ID 20180115193906.30053-13-james.morse@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

James Morse Jan. 15, 2018, 7:39 p.m. UTC
We expect to have firmware-first handling of RAS SErrors, with errors
notified via an APEI method. For systems without firmware-first, add
some minimal handling to KVM.

There are two ways KVM can take an SError due to a guest, either may be a
RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO,
or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit.

The current SError from EL2 code unmasks SError and tries to fence any
pending SError into a single instruction window. It then leaves SError
unmasked.

With the v8.2 RAS Extensions we may take an SError for a 'corrected'
error, but KVM is only able to handle SError from EL2 if they occur
during this single instruction window...

The RAS Extensions give us a new instruction to synchronise and
consume SErrors. The RAS Extensions document (ARM DDI0587),
'2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising
SError interrupts generated by 'instructions, translation table walks,
hardware updates to the translation tables, and instruction fetches on
the same PE'. This makes ESB equivalent to KVMs existing
'dsb, mrs-daifclr, isb' sequence.

Use the alternatives to synchronise and consume any SError using ESB
instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT
in the exit_code so that we can restart the vcpu if it turns out this
SError has no impact on the vcpu.

Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v4:
 * Moved the SError handling into handle_exit_early()
 * Dropped Marc & Christoffer's Reviewed-by due to handle_exit_early().

Changes since v3:
 * Moved that nop out of the firing line

 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 arch/arm64/include/asm/kvm_host.h    |  1 +
 arch/arm64/kernel/asm-offsets.c      |  1 +
 arch/arm64/kvm/handle_exit.c         | 14 +++++++++++++-
 arch/arm64/kvm/hyp/entry.S           | 13 +++++++++++++
 5 files changed, 33 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Jan. 16, 2018, 9:36 a.m. UTC | #1
On 15/01/18 19:39, James Morse wrote:
> We expect to have firmware-first handling of RAS SErrors, with errors
> notified via an APEI method. For systems without firmware-first, add
> some minimal handling to KVM.
> 
> There are two ways KVM can take an SError due to a guest, either may be a
> RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO,
> or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit.
> 
> The current SError from EL2 code unmasks SError and tries to fence any
> pending SError into a single instruction window. It then leaves SError
> unmasked.
> 
> With the v8.2 RAS Extensions we may take an SError for a 'corrected'
> error, but KVM is only able to handle SError from EL2 if they occur
> during this single instruction window...
> 
> The RAS Extensions give us a new instruction to synchronise and
> consume SErrors. The RAS Extensions document (ARM DDI0587),
> '2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising
> SError interrupts generated by 'instructions, translation table walks,
> hardware updates to the translation tables, and instruction fetches on
> the same PE'. This makes ESB equivalent to KVMs existing
> 'dsb, mrs-daifclr, isb' sequence.
> 
> Use the alternatives to synchronise and consume any SError using ESB
> instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT
> in the exit_code so that we can restart the vcpu if it turns out this
> SError has no impact on the vcpu.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v4:
>  * Moved the SError handling into handle_exit_early()
>  * Dropped Marc & Christoffer's Reviewed-by due to handle_exit_early().
> 
> Changes since v3:
>  * Moved that nop out of the firing line
> 
>  arch/arm64/include/asm/kvm_emulate.h |  5 +++++
>  arch/arm64/include/asm/kvm_host.h    |  1 +
>  arch/arm64/kernel/asm-offsets.c      |  1 +
>  arch/arm64/kvm/handle_exit.c         | 14 +++++++++++++-
>  arch/arm64/kvm/hyp/entry.S           | 13 +++++++++++++
>  5 files changed, 33 insertions(+), 1 deletion(-)

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>

	M.
Christoffer Dall Jan. 19, 2018, 7:54 p.m. UTC | #2
On Mon, Jan 15, 2018 at 07:39:05PM +0000, James Morse wrote:
> We expect to have firmware-first handling of RAS SErrors, with errors
> notified via an APEI method. For systems without firmware-first, add
> some minimal handling to KVM.
> 
> There are two ways KVM can take an SError due to a guest, either may be a
> RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO,
> or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit.
> 
> The current SError from EL2 code unmasks SError and tries to fence any
> pending SError into a single instruction window. It then leaves SError
> unmasked.
> 
> With the v8.2 RAS Extensions we may take an SError for a 'corrected'
> error, but KVM is only able to handle SError from EL2 if they occur
> during this single instruction window...
> 
> The RAS Extensions give us a new instruction to synchronise and
> consume SErrors. The RAS Extensions document (ARM DDI0587),
> '2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising
> SError interrupts generated by 'instructions, translation table walks,
> hardware updates to the translation tables, and instruction fetches on
> the same PE'. This makes ESB equivalent to KVMs existing
> 'dsb, mrs-daifclr, isb' sequence.
> 
> Use the alternatives to synchronise and consume any SError using ESB
> instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT
> in the exit_code so that we can restart the vcpu if it turns out this
> SError has no impact on the vcpu.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v4:
>  * Moved the SError handling into handle_exit_early()
>  * Dropped Marc & Christoffer's Reviewed-by due to handle_exit_early().
> 

I realize this is queued, but for good measure, I'm still happy with
this change after handle_exit_early().

Thanks,
-Christoffer
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6d3614795197..e002ab7f919a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -176,6 +176,11 @@  static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
 	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
 }
 
+static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.disr_el1;
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index abcfd164e690..4485ae8e98de 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -90,6 +90,7 @@  struct kvm_vcpu_fault_info {
 	u32 esr_el2;		/* Hyp Syndrom Register */
 	u64 far_el2;		/* Hyp Fault Address Register */
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+	u64 disr_el1;		/* Deferred [SError] Status Register */
 };
 
 /*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1dcc493f5765..1303e04110cd 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -132,6 +132,7 @@  int main(void)
   BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(VCPU_FAULT_DISR,	offsetof(struct kvm_vcpu, arch.fault.disr_el1));
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6a5a5db4292f..c09fc5a576c7 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,6 +23,7 @@ 
 #include <linux/kvm_host.h>
 
 #include <asm/esr.h>
+#include <asm/exception.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
@@ -249,7 +250,6 @@  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			*vcpu_pc(vcpu) -= adj;
 		}
 
-		kvm_inject_vabt(vcpu);
 		return 1;
 	}
 
@@ -286,6 +286,18 @@  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		       int exception_index)
 {
+	if (ARM_SERROR_PENDING(exception_index)) {
+		if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
+			u64 disr = kvm_vcpu_get_disr(vcpu);
+
+			kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
+		} else {
+			kvm_inject_vabt(vcpu);
+		}
+
+		return;
+	}
+
 	exception_index = ARM_EXCEPTION_CODE(exception_index);
 
 	if (exception_index == ARM_EXCEPTION_EL1_SERROR)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fe4678f20a85..fdd1068ee3a5 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -124,6 +124,17 @@  ENTRY(__guest_exit)
 	// Now restore the host regs
 	restore_callee_saved_regs x2
 
+alternative_if ARM64_HAS_RAS_EXTN
+	// If we have the RAS extensions we can consume a pending error
+	// without an unmask-SError and isb.
+	esb
+	mrs_s	x2, SYS_DISR_EL1
+	str	x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
+	cbz	x2, 1f
+	msr_s	SYS_DISR_EL1, xzr
+	orr	x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
+1:	ret
+alternative_else
 	// If we have a pending asynchronous abort, now is the
 	// time to find out. From your VAXorcist book, page 666:
 	// "Threaten me not, oh Evil one!  For I speak with
@@ -134,7 +145,9 @@  ENTRY(__guest_exit)
 	mov	x5, x0
 
 	dsb	sy		// Synchronize against in-flight ld/st
+	nop
 	msr	daifclr, #4	// Unmask aborts
+alternative_endif
 
 	// This is our single instruction exception window. A pending
 	// SError is guaranteed to occur at the earliest when we unmask