@@ -2117,9 +2117,30 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
memslot = gfn_to_memslot(vcpu->kvm, gfn);
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
write_fault = kvm_is_write_fault(vcpu);
- if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
+
+ /*
+ * The IPA is reported as [MAX:12], so we need to complement it with
+ * the bottom 12 bits from the faulting VA. This is always 12 bits,
+ * irrespective of the page size.
+ */
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
+
+ /*
+ * We can perform some early fault triage based purely on the memslot
+ * information:
+ *
+ * Faults on IPAs falling outside of any memslot are re-injected
+ * into the guest as external aborts if they were either signalled as
+ * instruction aborts or as a stage-2 fault on a translation table walk.
+ * If the instruction was a cache maintenance instruction then it is
+ * quietly skipped, otherwise we exit to userspace for MMIO emulation.
+ *
+ * Write faults on IPAs falling within a read-only memslot are reported
+ * to userspace as MMIO exits. This includes cache maintenance and
+ * stage-2 faults on translation table walks,
+ */
+ if (kvm_is_error_hva(hva)) {
if (kvm_vcpu_trap_is_iabt(vcpu)) {
- /* Prefetch Abort on I/O address */
ret = -ENOEXEC;
goto out;
}
@@ -2129,30 +2150,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out;
}
- /*
- * Check for a cache maintenance operation. Since we
- * ended-up here, we know it is outside of any memory
- * slot. But we can't find out if that is for a device,
- * or if the guest is just being stupid. The only thing
- * we know for sure is that this range cannot be cached.
- *
- * So let's assume that the guest is just being
- * cautious, and skip the instruction.
- */
if (kvm_vcpu_dabt_is_cm(vcpu)) {
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
ret = 1;
goto out_unlock;
}
- /*
- * The IPA is reported as [MAX:12], so we need to
- * complement it with the bottom 12 bits from the
- * faulting VA. This is always 12 bits, irrespective
- * of the page size.
- */
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
-
/*
* No valid syndrome? Ask userspace for help if it has
* volunteered to do so, and bail out otherwise.
@@ -2161,7 +2164,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = handle_error_invalid_dabt(vcpu, run, fault_ipa);
goto out_unlock;
}
+ }
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
ret = io_mem_abort(vcpu, run, fault_ipa);
goto out_unlock;
}
Although taking a write-fault on a read-only memslot triggers an MMIO exit back to userspace, lumping the handling together in kvm_handle_guest_abort() causes some of the early triage to have weird effects on userspace. For example, if a guest generates a stage-2 fault on a stage-1 translation fault when trying to fetch an instruction from a read-only memslot, it will be mistakenly treated as an attempt to execute from MMIO and a prefetch abort will be re-injected into the guest. Separate the MMIO handling from the read-only memslot handling, so that the latter is left entirely up to userspace. Note that this _will_ result in more exits than before for read-only memslots, since userspace will now see some cache maintenance and instruction-side aborts. Cc: Marc Zyngier <maz@kernel.org> Cc: Quentin Perret <qperret@google.com> Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kvm/mmu.c | 45 ++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-)