Message ID | 20230315021738.1151386-11-amoorthy@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Avoiding slow get-user-pages via memory fault exit | expand |
On Wed, Mar 15, 2023 at 02:17:34AM +0000, Anish Moorthy <amoorthy@google.com> wrote: > When a memslot has the KVM_MEM_MEMORY_FAULT_EXIT flag set, exit to > userspace upon encountering a page fault for which the userspace > page tables do not contain a present mapping. > --- > arch/x86/kvm/mmu/mmu.c | 33 +++++++++++++++++++++++++-------- > arch/x86/kvm/x86.c | 1 + > 2 files changed, 26 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 5e0140db384f6..68bc4ab2bd942 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3214,7 +3214,9 @@ static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) > send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); > } > > -static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > +static int kvm_handle_error_pfn( > + struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > + bool faulted_on_absent_mapping) > { > if (is_sigpending_pfn(fault->pfn)) { > kvm_handle_signal_exit(vcpu); > @@ -3234,7 +3236,11 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa > return RET_PF_RETRY; > } > > - return -EFAULT; > + return kvm_memfault_exit_or_efault( > + vcpu, fault->gfn * PAGE_SIZE, PAGE_SIZE, > + faulted_on_absent_mapping > + ? KVM_MEMFAULT_REASON_ABSENT_MAPPING > + : KVM_MEMFAULT_REASON_UNKNOWN); > } > > static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, > @@ -4209,7 +4215,9 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); > } > > -static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > +static int __kvm_faultin_pfn( > + struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > + bool fault_on_absent_mapping) > { > struct kvm_memory_slot *slot = fault->slot; > bool async; > @@ -4242,9 +4250,15 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > } > > async = false; > - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, > - fault->write, &fault->map_writable, > - &fault->hva); > + > + fault->pfn = __gfn_to_pfn_memslot( > + slot, fault->gfn, > + fault_on_absent_mapping, > + false, > + fault_on_absent_mapping ? NULL : &async, > + fault->write, &fault->map_writable, > + &fault->hva); > + > if (!async) > return RET_PF_CONTINUE; /* *pfn has correct page already */ > > @@ -4274,16 +4288,19 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, > unsigned int access) > { > int ret; > + bool fault_on_absent_mapping > + = likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot); nit: Instead of passing around the value, we can add a new member to struct kvm_page_fault::fault_on_absent_mapping. fault->fault_on_absent_mapping = likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot); Thanks,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5e0140db384f6..68bc4ab2bd942 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3214,7 +3214,9 @@ static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); } -static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int kvm_handle_error_pfn( + struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + bool faulted_on_absent_mapping) { if (is_sigpending_pfn(fault->pfn)) { kvm_handle_signal_exit(vcpu); @@ -3234,7 +3236,11 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa return RET_PF_RETRY; } - return -EFAULT; + return kvm_memfault_exit_or_efault( + vcpu, fault->gfn * PAGE_SIZE, PAGE_SIZE, + faulted_on_absent_mapping + ? KVM_MEMFAULT_REASON_ABSENT_MAPPING + : KVM_MEMFAULT_REASON_UNKNOWN); } static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, @@ -4209,7 +4215,9 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); } -static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int __kvm_faultin_pfn( + struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + bool fault_on_absent_mapping) { struct kvm_memory_slot *slot = fault->slot; bool async; @@ -4242,9 +4250,15 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault } async = false; - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, - fault->write, &fault->map_writable, - &fault->hva); + + fault->pfn = __gfn_to_pfn_memslot( + slot, fault->gfn, + fault_on_absent_mapping, + false, + fault_on_absent_mapping ? NULL : &async, + fault->write, &fault->map_writable, + &fault->hva); + if (!async) return RET_PF_CONTINUE; /* *pfn has correct page already */ @@ -4274,16 +4288,19 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, unsigned int access) { int ret; + bool fault_on_absent_mapping + = likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot); fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); - ret = __kvm_faultin_pfn(vcpu, fault); + ret = __kvm_faultin_pfn( + vcpu, fault, fault_on_absent_mapping); if (ret != RET_PF_CONTINUE) return ret; if (unlikely(is_error_pfn(fault->pfn))) - return kvm_handle_error_pfn(vcpu, fault); + return kvm_handle_error_pfn(vcpu, fault, fault_on_absent_mapping); if (unlikely(!fault->slot)) return kvm_handle_noslot_fault(vcpu, fault, access); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b3c1b2f57e680..41435324b41d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4426,6 +4426,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: case KVM_CAP_X86_MEMORY_FAULT_EXIT: + case KVM_CAP_MEMORY_FAULT_NOWAIT: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: