diff mbox series

[WIP,v2,10/14] KVM: x86: Implement KVM_CAP_MEMORY_FAULT_NOWAIT

Message ID 20230315021738.1151386-11-amoorthy@google.com (mailing list archive)
State New, archived
Headers show
Series Avoiding slow get-user-pages via memory fault exit | expand

Commit Message

Anish Moorthy March 15, 2023, 2:17 a.m. UTC
When a memslot has the KVM_MEM_MEMORY_FAULT_EXIT flag set, exit to
userspace upon encountering a page fault for which the userspace
page tables do not contain a present mapping.
---
 arch/x86/kvm/mmu/mmu.c | 33 +++++++++++++++++++++++++--------
 arch/x86/kvm/x86.c     |  1 +
 2 files changed, 26 insertions(+), 8 deletions(-)

Comments

Isaku Yamahata March 17, 2023, 12:32 a.m. UTC | #1
On Wed, Mar 15, 2023 at 02:17:34AM +0000,
Anish Moorthy <amoorthy@google.com> wrote:

> When a memslot has the KVM_MEM_MEMORY_FAULT_EXIT flag set, exit to
> userspace upon encountering a page fault for which the userspace
> page tables do not contain a present mapping.
> ---
>  arch/x86/kvm/mmu/mmu.c | 33 +++++++++++++++++++++++++--------
>  arch/x86/kvm/x86.c     |  1 +
>  2 files changed, 26 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 5e0140db384f6..68bc4ab2bd942 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3214,7 +3214,9 @@ static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn)
>  	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current);
>  }
>  
> -static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +static int kvm_handle_error_pfn(
> +	struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> +	bool faulted_on_absent_mapping)
>  {
>  	if (is_sigpending_pfn(fault->pfn)) {
>  		kvm_handle_signal_exit(vcpu);
> @@ -3234,7 +3236,11 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
>  		return RET_PF_RETRY;
>  	}
>  
> -	return -EFAULT;
> +	return kvm_memfault_exit_or_efault(
> +		vcpu, fault->gfn * PAGE_SIZE, PAGE_SIZE,
> +		faulted_on_absent_mapping
> +			? KVM_MEMFAULT_REASON_ABSENT_MAPPING
> +			: KVM_MEMFAULT_REASON_UNKNOWN);
>  }
>  
>  static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
> @@ -4209,7 +4215,9 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
>  	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
>  }
>  
> -static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +static int __kvm_faultin_pfn(
> +	struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> +	bool fault_on_absent_mapping)
>  {
>  	struct kvm_memory_slot *slot = fault->slot;
>  	bool async;
> @@ -4242,9 +4250,15 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  	}
>  
>  	async = false;
> -	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
> -					  fault->write, &fault->map_writable,
> -					  &fault->hva);
> +
> +	fault->pfn = __gfn_to_pfn_memslot(
> +		slot, fault->gfn,
> +		fault_on_absent_mapping,
> +		false,
> +		fault_on_absent_mapping ? NULL : &async,
> +		fault->write, &fault->map_writable,
> +		&fault->hva);
> +
>  	if (!async)
>  		return RET_PF_CONTINUE; /* *pfn has correct page already */
>  
> @@ -4274,16 +4288,19 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
>  			   unsigned int access)
>  {
>  	int ret;
> +	bool fault_on_absent_mapping
> +		= likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot);

nit: Instead of passing around the value, we can add a new member to
struct kvm_page_fault::fault_on_absent_mapping.

  fault->fault_on_absent_mapping = likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot);

Thanks,
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5e0140db384f6..68bc4ab2bd942 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3214,7 +3214,9 @@  static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn)
 	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current);
 }
 
-static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+static int kvm_handle_error_pfn(
+	struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
+	bool faulted_on_absent_mapping)
 {
 	if (is_sigpending_pfn(fault->pfn)) {
 		kvm_handle_signal_exit(vcpu);
@@ -3234,7 +3236,11 @@  static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
 		return RET_PF_RETRY;
 	}
 
-	return -EFAULT;
+	return kvm_memfault_exit_or_efault(
+		vcpu, fault->gfn * PAGE_SIZE, PAGE_SIZE,
+		faulted_on_absent_mapping
+			? KVM_MEMFAULT_REASON_ABSENT_MAPPING
+			: KVM_MEMFAULT_REASON_UNKNOWN);
 }
 
 static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu,
@@ -4209,7 +4215,9 @@  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
 }
 
-static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+static int __kvm_faultin_pfn(
+	struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
+	bool fault_on_absent_mapping)
 {
 	struct kvm_memory_slot *slot = fault->slot;
 	bool async;
@@ -4242,9 +4250,15 @@  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	}
 
 	async = false;
-	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
-					  fault->write, &fault->map_writable,
-					  &fault->hva);
+
+	fault->pfn = __gfn_to_pfn_memslot(
+		slot, fault->gfn,
+		fault_on_absent_mapping,
+		false,
+		fault_on_absent_mapping ? NULL : &async,
+		fault->write, &fault->map_writable,
+		&fault->hva);
+
 	if (!async)
 		return RET_PF_CONTINUE; /* *pfn has correct page already */
 
@@ -4274,16 +4288,19 @@  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			   unsigned int access)
 {
 	int ret;
+	bool fault_on_absent_mapping
+		= likely(fault->slot) && kvm_slot_fault_on_absent_mapping(fault->slot);
 
 	fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
 	smp_rmb();
 
-	ret = __kvm_faultin_pfn(vcpu, fault);
+	ret = __kvm_faultin_pfn(
+		vcpu, fault, fault_on_absent_mapping);
 	if (ret != RET_PF_CONTINUE)
 		return ret;
 
 	if (unlikely(is_error_pfn(fault->pfn)))
-		return kvm_handle_error_pfn(vcpu, fault);
+		return kvm_handle_error_pfn(vcpu, fault, fault_on_absent_mapping);
 
 	if (unlikely(!fault->slot))
 		return kvm_handle_noslot_fault(vcpu, fault, access);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b3c1b2f57e680..41435324b41d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4426,6 +4426,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
 	case KVM_CAP_X86_MEMORY_FAULT_EXIT:
+	case KVM_CAP_MEMORY_FAULT_NOWAIT:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL: