@@ -4490,6 +4490,93 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
return fault_handled;
}
+/*
+ * Attempt to handle a page fault without the use of get_user_pages, or
+ * acquiring the MMU lock. This function can handle page faults resulting from
+ * missing permissions on a PTE, set up by KVM for dirty logging or access
+ * tracking.
+ *
+ * Return value:
+ * - true: The page fault may have been fixed by this function. Let the vCPU
+ * access on the same address again.
+ * - false: This function cannot handle the page fault. Let the full page fault
+ * path fix it.
+ */
+static bool fast_direct_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int level,
+ u32 error_code)
+{
+ struct direct_walk_iterator iter;
+ bool fault_handled = false;
+ bool remove_write_prot;
+ bool remove_acc_track;
+ u64 new_pte;
+
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
+ return false;
+
+ if (!page_fault_can_be_fast(error_code))
+ return false;
+
+ direct_walk_iterator_setup_walk(&iter, vcpu->kvm,
+ kvm_arch_vcpu_memslots_id(vcpu), gpa >> PAGE_SHIFT,
+ (gpa >> PAGE_SHIFT) + 1, MMU_NO_LOCK);
+ while (direct_walk_iterator_next_present_leaf_pte(&iter)) {
+ remove_write_prot = (error_code & PFERR_WRITE_MASK);
+ remove_write_prot &= !(iter.old_pte & PT_WRITABLE_MASK);
+ remove_write_prot &= spte_can_locklessly_be_made_writable(
+ iter.old_pte);
+
+ remove_acc_track = is_access_track_spte(iter.old_pte);
+
+ /* Verify that the fault can be handled in the fast path */
+ if (!remove_acc_track && !remove_write_prot)
+ break;
+
+ /*
+ * If dirty logging is enabled:
+ *
+ * Do not fix write-permission on the large spte since we only
+ * dirty the first page into the dirty-bitmap in
+ * fast_pf_fix_direct_spte() that means other pages are missed
+ * if its slot is dirty-logged.
+ *
+ * Instead, we let the slow page fault path create a normal spte
+ * to fix the access.
+ *
+ * See the comments in kvm_arch_commit_memory_region().
+ */
+ if (remove_write_prot &&
+ iter.level > PT_PAGE_TABLE_LEVEL)
+ break;
+
+ new_pte = iter.old_pte;
+ if (remove_acc_track)
+ new_pte = restore_acc_track_spte(iter.old_pte);
+ if (remove_write_prot)
+ new_pte |= PT_WRITABLE_MASK;
+
+ if (new_pte == iter.old_pte) {
+ fault_handled = true;
+ break;
+ }
+
+ if (!direct_walk_iterator_set_pte(&iter, new_pte))
+ continue;
+
+ if (remove_write_prot)
+ kvm_vcpu_mark_page_dirty(vcpu, iter.pte_gfn_start);
+
+ fault_handled = true;
+ break;
+ }
+ direct_walk_iterator_end_traversal(&iter);
+
+ trace_fast_page_fault(vcpu, gpa, error_code, iter.ptep,
+ iter.old_pte, fault_handled);
+
+ return fault_handled;
+}
+
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
@@ -5182,9 +5269,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
}
- if (!vcpu->kvm->arch.direct_mmu_enabled)
+ if (vcpu->kvm->arch.direct_mmu_enabled) {
+ if (fast_direct_page_fault(vcpu, gpa, level, error_code))
+ return RET_PF_RETRY;
+ } else {
if (fast_page_fault(vcpu, gpa, level, error_code))
return RET_PF_RETRY;
+ }
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
While the direct MMU can handle page faults much faster than the existing implementation, it cannot handle faults caused by write protection or access tracking as quickly. Add a fast path similar to the existing fast path to handle these cases without the MMU read lock or calls to get_user_pages. Signed-off-by: Ben Gardon <bgardon@google.com> --- arch/x86/kvm/mmu.c | 93 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-)