diff mbox series

[v4,10/16] KVM: x86: Implement KVM_CAP_NOWAIT_ON_FAULT

Message ID 20230602161921.208564-11-amoorthy@google.com (mailing list archive)
State New, archived
Headers show
Series Improve scalability of KVM + userfaultfd live migration via annotated memory faults. | expand

Commit Message

Anish Moorthy June 2, 2023, 4:19 p.m. UTC
When the memslot flag is enabled, fail guest memory accesses for which
fast-gup fails (ie, where resolving the page fault would require putting
the faulting thread to sleep).

Suggested-by: James Houghton <jthoughton@google.com>
Signed-off-by: Anish Moorthy <amoorthy@google.com>
---
 Documentation/virt/kvm/api.rst |  2 +-
 arch/x86/kvm/mmu/mmu.c         | 17 ++++++++++++-----
 arch/x86/kvm/x86.c             |  1 +
 3 files changed, 14 insertions(+), 6 deletions(-)

Comments

Sean Christopherson June 14, 2023, 8:25 p.m. UTC | #1
On Fri, Jun 02, 2023, Anish Moorthy wrote:
> When the memslot flag is enabled, fail guest memory accesses for which
> fast-gup fails (ie, where resolving the page fault would require putting
> the faulting thread to sleep).
> 
> Suggested-by: James Houghton <jthoughton@google.com>
> Signed-off-by: Anish Moorthy <amoorthy@google.com>
> ---
>  Documentation/virt/kvm/api.rst |  2 +-
>  arch/x86/kvm/mmu/mmu.c         | 17 ++++++++++++-----
>  arch/x86/kvm/x86.c             |  1 +
>  3 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 9daadbe2c7ed..aa7b4024fd41 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -7783,7 +7783,7 @@ bugs and reported to the maintainers so that annotations can be added.
>  7.35 KVM_CAP_NOWAIT_ON_FAULT
>  ----------------------------
>  
> -:Architectures: None
> +:Architectures: x86
>  :Returns: -EINVAL.
>  
>  The presence of this capability indicates that userspace may pass the
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index cb71aae9aaec..288008a64e5c 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4299,7 +4299,9 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
>  	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
>  }
>  
> -static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> +static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu,
> +			     struct kvm_page_fault *fault,
> +			     bool nowait)

More booleans!?  Just say no!  And in this case, there's no reason to pass in a
flag, just handle this entirely in __kvm_faultin_pfn().

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7b6eab6f84e8..ebf21f1f43ce 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4302,6 +4302,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
        struct kvm_memory_slot *slot = fault->slot;
+       bool nowait = kvm_is_slot_nowait_on_fault(slot);
        bool async;
 
        /*
@@ -4332,9 +4333,12 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        }
 
        async = false;
-       fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
-                                         fault->write, &fault->map_writable,
-                                         &fault->hva);
+
+       fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn,
+                                         nowait, false,
+                                         nowait ? NULL : &async,
+                                         fault->write, &fault->map_writable, &fault->hva);
+
        if (!async)
                return RET_PF_CONTINUE; /* *pfn has correct page already */


On a related topic, I would *love* for someone to overhaul gfn_to_pfn() to replace
the "booleans for everything" approach and instead have KVM pass FOLL_* flags
internally.  Rough sketch here: https://lkml.kernel.org/r/ZGvUsf7lMkrNDHuE%40google.com
Anish Moorthy July 7, 2023, 5:41 p.m. UTC | #2
On Wed, Jun 14, 2023 at 1:25 PM Sean Christopherson <seanjc@google.com> wrote:
> > -static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> > +static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu,
> > +                          struct kvm_page_fault *fault,
> > +                          bool nowait)
>
> More booleans!?  Just say no!  And in this case, there's no reason to pass in a
> flag, just handle this entirely in __kvm_faultin_pfn().

Ah, thanks: that extra parameter is a holdover from forever ago where
"nowait" was a special thing that was read by handle_error_pfn(). Done.
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9daadbe2c7ed..aa7b4024fd41 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7783,7 +7783,7 @@  bugs and reported to the maintainers so that annotations can be added.
 7.35 KVM_CAP_NOWAIT_ON_FAULT
 ----------------------------
 
-:Architectures: None
+:Architectures: x86
 :Returns: -EINVAL.
 
 The presence of this capability indicates that userspace may pass the
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cb71aae9aaec..288008a64e5c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4299,7 +4299,9 @@  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
 }
 
-static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu,
+			     struct kvm_page_fault *fault,
+			     bool nowait)
 {
 	struct kvm_memory_slot *slot = fault->slot;
 	bool async;
@@ -4332,9 +4334,12 @@  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	}
 
 	async = false;
-	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
-					  fault->write, &fault->map_writable,
-					  &fault->hva);
+
+	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn,
+					  nowait, false,
+					  nowait ? NULL : &async,
+					  fault->write, &fault->map_writable, &fault->hva);
+
 	if (!async)
 		return RET_PF_CONTINUE; /* *pfn has correct page already */
 
@@ -4368,7 +4373,9 @@  static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 	fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
 	smp_rmb();
 
-	ret = __kvm_faultin_pfn(vcpu, fault);
+	ret = __kvm_faultin_pfn(vcpu, fault,
+				likely(fault->slot)
+					&& kvm_slot_nowait_on_fault(fault->slot));
 	if (ret != RET_PF_CONTINUE)
 		return ret;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d15bacb3f634..4fbe9c811cc7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4498,6 +4498,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
 	case KVM_CAP_IRQFD_RESAMPLE:
 	case KVM_CAP_MEMORY_FAULT_INFO:
+	case KVM_CAP_NOWAIT_ON_FAULT:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL: