diff mbox series

[3/4] KVM: Get reference to VM's address space in the async #PF worker

Message ID 20240110011533.503302-4-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: Async #PF fixes and cleanups | expand

Commit Message

Sean Christopherson Jan. 10, 2024, 1:15 a.m. UTC
Get a reference to the target VM's address space in async_pf_execute()
instead of gifting a reference from kvm_setup_async_pf().  Keeping the
address space alive just to service an async #PF is counter-productive,
i.e. if the process is exiting and all vCPUs are dead, then NOT doing
get_user_pages_remote() and freeing the address space asap is desirable.

Handling the mm reference entirely within async_pf_execute() also
simplifies the async #PF flows as a whole, e.g. it's not immediately
obvious when the worker task vs. the vCPU task is responsible for putting
the gifted mm reference.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/async_pf.c      | 32 ++++++++++++++++++--------------
 2 files changed, 18 insertions(+), 15 deletions(-)

Comments

Xu Yilun Jan. 20, 2024, 3:16 p.m. UTC | #1
On Tue, Jan 09, 2024 at 05:15:32PM -0800, Sean Christopherson wrote:
> Get a reference to the target VM's address space in async_pf_execute()
> instead of gifting a reference from kvm_setup_async_pf().  Keeping the
> address space alive just to service an async #PF is counter-productive,
> i.e. if the process is exiting and all vCPUs are dead, then NOT doing
> get_user_pages_remote() and freeing the address space asap is desirable.
> 
> Handling the mm reference entirely within async_pf_execute() also
> simplifies the async #PF flows as a whole, e.g. it's not immediately
> obvious when the worker task vs. the vCPU task is responsible for putting
> the gifted mm reference.
> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  include/linux/kvm_host.h |  1 -
>  virt/kvm/async_pf.c      | 32 ++++++++++++++++++--------------
>  2 files changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7e7fd25b09b3..bbfefd7e612f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -238,7 +238,6 @@ struct kvm_async_pf {
>  	struct list_head link;
>  	struct list_head queue;
>  	struct kvm_vcpu *vcpu;
> -	struct mm_struct *mm;
>  	gpa_t cr2_or_gpa;
>  	unsigned long addr;
>  	struct kvm_arch_async_pf arch;
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index d5dc50318aa6..c3f4f351a2ae 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
>  {
>  	struct kvm_async_pf *apf =
>  		container_of(work, struct kvm_async_pf, work);
> -	struct mm_struct *mm = apf->mm;
>  	struct kvm_vcpu *vcpu = apf->vcpu;
> +	struct mm_struct *mm = vcpu->kvm->mm;
>  	unsigned long addr = apf->addr;
>  	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
>  	int locked = 1;
> @@ -56,16 +56,24 @@ static void async_pf_execute(struct work_struct *work)
>  	might_sleep();
>  
>  	/*
> -	 * This work is run asynchronously to the task which owns
> -	 * mm and might be done in another context, so we must
> -	 * access remotely.
> +	 * Attempt to pin the VM's host address space, and simply skip gup() if
> +	 * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
> +	 * holds a reference to its associated mm_struct until the very end of
> +	 * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
> +	 * work item is fully processed.
>  	 */
> -	mmap_read_lock(mm);
> -	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> -	if (locked)
> -		mmap_read_unlock(mm);
> -	mmput(mm);
> +	if (mmget_not_zero(mm)) {
> +		mmap_read_lock(mm);
> +		get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> +		if (locked)
> +			mmap_read_unlock(mm);
> +		mmput(mm);
> +	}
>  
> +	/*
> +	 * Notify and kick the vCPU even if faulting in the page failed, e.g.

How about when the process is exiting? Could we just skip the following?

Thanks,
Yilun

> +	 * so that the vCPU can retry the fault synchronously.
> +	 */
>  	if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
>  		kvm_arch_async_page_present(vcpu, apf);
>  
> @@ -129,10 +137,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
>  #ifdef CONFIG_KVM_ASYNC_PF_SYNC
>  		flush_work(&work->work);
>  #else
> -		if (cancel_work_sync(&work->work)) {
> -			mmput(work->mm);
> +		if (cancel_work_sync(&work->work))
>  			kmem_cache_free(async_pf_cache, work);
> -		}
>  #endif
>  		spin_lock(&vcpu->async_pf.lock);
>  	}
> @@ -211,8 +217,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	work->cr2_or_gpa = cr2_or_gpa;
>  	work->addr = hva;
>  	work->arch = *arch;
> -	work->mm = current->mm;
> -	mmget(work->mm);
>  
>  	INIT_WORK(&work->work, async_pf_execute);
>  
> -- 
> 2.43.0.472.g3155946c3a-goog
>
Sean Christopherson Jan. 24, 2024, 6:52 p.m. UTC | #2
On Sat, Jan 20, 2024, Xu Yilun wrote:
> On Tue, Jan 09, 2024 at 05:15:32PM -0800, Sean Christopherson wrote:
> > Get a reference to the target VM's address space in async_pf_execute()
> > instead of gifting a reference from kvm_setup_async_pf().  Keeping the
> > address space alive just to service an async #PF is counter-productive,
> > i.e. if the process is exiting and all vCPUs are dead, then NOT doing
> > get_user_pages_remote() and freeing the address space asap is desirable.
> > 
> > Handling the mm reference entirely within async_pf_execute() also
> > simplifies the async #PF flows as a whole, e.g. it's not immediately
> > obvious when the worker task vs. the vCPU task is responsible for putting
> > the gifted mm reference.
> > 
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >  include/linux/kvm_host.h |  1 -
> >  virt/kvm/async_pf.c      | 32 ++++++++++++++++++--------------
> >  2 files changed, 18 insertions(+), 15 deletions(-)
> > 
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7e7fd25b09b3..bbfefd7e612f 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -238,7 +238,6 @@ struct kvm_async_pf {
> >  	struct list_head link;
> >  	struct list_head queue;
> >  	struct kvm_vcpu *vcpu;
> > -	struct mm_struct *mm;
> >  	gpa_t cr2_or_gpa;
> >  	unsigned long addr;
> >  	struct kvm_arch_async_pf arch;
> > diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> > index d5dc50318aa6..c3f4f351a2ae 100644
> > --- a/virt/kvm/async_pf.c
> > +++ b/virt/kvm/async_pf.c
> > @@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
> >  {
> >  	struct kvm_async_pf *apf =
> >  		container_of(work, struct kvm_async_pf, work);
> > -	struct mm_struct *mm = apf->mm;
> >  	struct kvm_vcpu *vcpu = apf->vcpu;
> > +	struct mm_struct *mm = vcpu->kvm->mm;
> >  	unsigned long addr = apf->addr;
> >  	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
> >  	int locked = 1;
> > @@ -56,16 +56,24 @@ static void async_pf_execute(struct work_struct *work)
> >  	might_sleep();
> >  
> >  	/*
> > -	 * This work is run asynchronously to the task which owns
> > -	 * mm and might be done in another context, so we must
> > -	 * access remotely.
> > +	 * Attempt to pin the VM's host address space, and simply skip gup() if
> > +	 * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
> > +	 * holds a reference to its associated mm_struct until the very end of
> > +	 * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
> > +	 * work item is fully processed.
> >  	 */
> > -	mmap_read_lock(mm);
> > -	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> > -	if (locked)
> > -		mmap_read_unlock(mm);
> > -	mmput(mm);
> > +	if (mmget_not_zero(mm)) {
> > +		mmap_read_lock(mm);
> > +		get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> > +		if (locked)
> > +			mmap_read_unlock(mm);
> > +		mmput(mm);
> > +	}
> >  
> > +	/*
> > +	 * Notify and kick the vCPU even if faulting in the page failed, e.g.
> 
> How about when the process is exiting? Could we just skip the following?

Maybe?  I'm not opposed to trimming this down even more, but I doubt it will make
much of a difference.  The vCPU can't be running so async_pf.lock shouldn't be
contended, no IPIs will be issued for kicks, etc.  So for this patch at least,
I want to take the most conservative approach while still cleaning up the mm_struct
usage.

> > +	 * so that the vCPU can retry the fault synchronously.
> > +	 */
> >  	if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
> >  		kvm_arch_async_page_present(vcpu, apf);
> >  
> > @@ -129,10 +137,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
> >  #ifdef CONFIG_KVM_ASYNC_PF_SYNC
> >  		flush_work(&work->work);
> >  #else
> > -		if (cancel_work_sync(&work->work)) {
> > -			mmput(work->mm);
> > +		if (cancel_work_sync(&work->work))
> >  			kmem_cache_free(async_pf_cache, work);
> > -		}
> >  #endif
> >  		spin_lock(&vcpu->async_pf.lock);
> >  	}
> > @@ -211,8 +217,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> >  	work->cr2_or_gpa = cr2_or_gpa;
> >  	work->addr = hva;
> >  	work->arch = *arch;
> > -	work->mm = current->mm;
> > -	mmget(work->mm);
> >  
> >  	INIT_WORK(&work->work, async_pf_execute);
> >  
> > -- 
> > 2.43.0.472.g3155946c3a-goog
> >
Xu Yilun Jan. 26, 2024, 8:06 a.m. UTC | #3
On Wed, Jan 24, 2024 at 10:52:12AM -0800, Sean Christopherson wrote:
> On Sat, Jan 20, 2024, Xu Yilun wrote:
> > On Tue, Jan 09, 2024 at 05:15:32PM -0800, Sean Christopherson wrote:
> > > Get a reference to the target VM's address space in async_pf_execute()
> > > instead of gifting a reference from kvm_setup_async_pf().  Keeping the
> > > address space alive just to service an async #PF is counter-productive,
> > > i.e. if the process is exiting and all vCPUs are dead, then NOT doing
> > > get_user_pages_remote() and freeing the address space asap is desirable.
> > > 
> > > Handling the mm reference entirely within async_pf_execute() also
> > > simplifies the async #PF flows as a whole, e.g. it's not immediately
> > > obvious when the worker task vs. the vCPU task is responsible for putting
> > > the gifted mm reference.
> > > 
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > ---
> > >  include/linux/kvm_host.h |  1 -
> > >  virt/kvm/async_pf.c      | 32 ++++++++++++++++++--------------
> > >  2 files changed, 18 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 7e7fd25b09b3..bbfefd7e612f 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -238,7 +238,6 @@ struct kvm_async_pf {
> > >  	struct list_head link;
> > >  	struct list_head queue;
> > >  	struct kvm_vcpu *vcpu;
> > > -	struct mm_struct *mm;
> > >  	gpa_t cr2_or_gpa;
> > >  	unsigned long addr;
> > >  	struct kvm_arch_async_pf arch;
> > > diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> > > index d5dc50318aa6..c3f4f351a2ae 100644
> > > --- a/virt/kvm/async_pf.c
> > > +++ b/virt/kvm/async_pf.c
> > > @@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
> > >  {
> > >  	struct kvm_async_pf *apf =
> > >  		container_of(work, struct kvm_async_pf, work);
> > > -	struct mm_struct *mm = apf->mm;
> > >  	struct kvm_vcpu *vcpu = apf->vcpu;
> > > +	struct mm_struct *mm = vcpu->kvm->mm;
> > >  	unsigned long addr = apf->addr;
> > >  	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
> > >  	int locked = 1;
> > > @@ -56,16 +56,24 @@ static void async_pf_execute(struct work_struct *work)
> > >  	might_sleep();
> > >  
> > >  	/*
> > > -	 * This work is run asynchronously to the task which owns
> > > -	 * mm and might be done in another context, so we must
> > > -	 * access remotely.
> > > +	 * Attempt to pin the VM's host address space, and simply skip gup() if
> > > +	 * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
> > > +	 * holds a reference to its associated mm_struct until the very end of
> > > +	 * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
> > > +	 * work item is fully processed.
> > >  	 */
> > > -	mmap_read_lock(mm);
> > > -	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> > > -	if (locked)
> > > -		mmap_read_unlock(mm);
> > > -	mmput(mm);
> > > +	if (mmget_not_zero(mm)) {
> > > +		mmap_read_lock(mm);
> > > +		get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> > > +		if (locked)
> > > +			mmap_read_unlock(mm);
> > > +		mmput(mm);
> > > +	}
> > >  
> > > +	/*
> > > +	 * Notify and kick the vCPU even if faulting in the page failed, e.g.
> > 
> > How about when the process is exiting? Could we just skip the following?
> 
> Maybe?  I'm not opposed to trimming this down even more, but I doubt it will make
> much of a difference.  The vCPU can't be running so async_pf.lock shouldn't be
> contended, no IPIs will be issued for kicks, etc.  So for this patch at least,
> I want to take the most conservative approach while still cleaning up the mm_struct
> usage.

It's good to me.

Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Vitaly Kuznetsov Jan. 26, 2024, 4:21 p.m. UTC | #4
Sean Christopherson <seanjc@google.com> writes:

> Get a reference to the target VM's address space in async_pf_execute()
> instead of gifting a reference from kvm_setup_async_pf().  Keeping the
> address space alive just to service an async #PF is counter-productive,
> i.e. if the process is exiting and all vCPUs are dead, then NOT doing
> get_user_pages_remote() and freeing the address space asap is
> desirable.

It took me a while to realize why all vCPU fds are managed by the same
mm which did KVM_CREATE_VM as (AFAIU) fds can be passed around. Turns
out, we explicitly forbid this in kvm_vcpu_ioctl():

        if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
                return -EIO;

so this indeed means that grabbing current->mm in kvm_setup_async_pf()
can be avoided. I'm not sure whether it's just me or a "all vCPUs are
quired to be managed by the same mm" comment somewhere would be helpful.

>
> Handling the mm reference entirely within async_pf_execute() also
> simplifies the async #PF flows as a whole, e.g. it's not immediately
> obvious when the worker task vs. the vCPU task is responsible for putting
> the gifted mm reference.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  include/linux/kvm_host.h |  1 -
>  virt/kvm/async_pf.c      | 32 ++++++++++++++++++--------------
>  2 files changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7e7fd25b09b3..bbfefd7e612f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -238,7 +238,6 @@ struct kvm_async_pf {
>  	struct list_head link;
>  	struct list_head queue;
>  	struct kvm_vcpu *vcpu;
> -	struct mm_struct *mm;
>  	gpa_t cr2_or_gpa;
>  	unsigned long addr;
>  	struct kvm_arch_async_pf arch;
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index d5dc50318aa6..c3f4f351a2ae 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
>  {
>  	struct kvm_async_pf *apf =
>  		container_of(work, struct kvm_async_pf, work);
> -	struct mm_struct *mm = apf->mm;
>  	struct kvm_vcpu *vcpu = apf->vcpu;
> +	struct mm_struct *mm = vcpu->kvm->mm;
>  	unsigned long addr = apf->addr;
>  	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
>  	int locked = 1;
> @@ -56,16 +56,24 @@ static void async_pf_execute(struct work_struct *work)
>  	might_sleep();
>  
>  	/*
> -	 * This work is run asynchronously to the task which owns
> -	 * mm and might be done in another context, so we must
> -	 * access remotely.
> +	 * Attempt to pin the VM's host address space, and simply skip gup() if
> +	 * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
> +	 * holds a reference to its associated mm_struct until the very end of
> +	 * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
> +	 * work item is fully processed.
>  	 */
> -	mmap_read_lock(mm);
> -	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> -	if (locked)
> -		mmap_read_unlock(mm);
> -	mmput(mm);
> +	if (mmget_not_zero(mm)) {
> +		mmap_read_lock(mm);
> +		get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
> +		if (locked)
> +			mmap_read_unlock(mm);
> +		mmput(mm);
> +	}
>  
> +	/*
> +	 * Notify and kick the vCPU even if faulting in the page failed, e.g.
> +	 * so that the vCPU can retry the fault synchronously.
> +	 */
>  	if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
>  		kvm_arch_async_page_present(vcpu, apf);
>  
> @@ -129,10 +137,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
>  #ifdef CONFIG_KVM_ASYNC_PF_SYNC
>  		flush_work(&work->work);
>  #else
> -		if (cancel_work_sync(&work->work)) {
> -			mmput(work->mm);
> +		if (cancel_work_sync(&work->work))
>  			kmem_cache_free(async_pf_cache, work);
> -		}
>  #endif
>  		spin_lock(&vcpu->async_pf.lock);
>  	}
> @@ -211,8 +217,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	work->cr2_or_gpa = cr2_or_gpa;
>  	work->addr = hva;
>  	work->arch = *arch;
> -	work->mm = current->mm;
> -	mmget(work->mm);
>  
>  	INIT_WORK(&work->work, async_pf_execute);

Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Sean Christopherson Jan. 26, 2024, 4:39 p.m. UTC | #5
On Fri, Jan 26, 2024, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > Get a reference to the target VM's address space in async_pf_execute()
> > instead of gifting a reference from kvm_setup_async_pf().  Keeping the
> > address space alive just to service an async #PF is counter-productive,
> > i.e. if the process is exiting and all vCPUs are dead, then NOT doing
> > get_user_pages_remote() and freeing the address space asap is
> > desirable.
> 
> It took me a while to realize why all vCPU fds are managed by the same
> mm which did KVM_CREATE_VM as (AFAIU) fds can be passed around. Turns
> out, we explicitly forbid this in kvm_vcpu_ioctl():
> 
>         if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
>                 return -EIO;
> 
> so this indeed means that grabbing current->mm in kvm_setup_async_pf()
> can be avoided. I'm not sure whether it's just me or a "all vCPUs are
> quired to be managed by the same mm" comment somewhere would be helpful.

It's definitely not just you.  Documentation/virt/kvm/* could use thorough
documentation of what all in KVM relies on vCPUs, and all meaningful ioctls(),
to be executed in the same mm_struct (address space).  Because that requirement
is pervasive throughout KVM.  E.g. sharing KVM page tables across vCPUs is safe
iff all vCPUs are in the same address space, otherwise the hva=>pfn translations
through the memslot would diverge, mmu_notifiers would be all kinds of broken, etc.
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e7fd25b09b3..bbfefd7e612f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -238,7 +238,6 @@  struct kvm_async_pf {
 	struct list_head link;
 	struct list_head queue;
 	struct kvm_vcpu *vcpu;
-	struct mm_struct *mm;
 	gpa_t cr2_or_gpa;
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d5dc50318aa6..c3f4f351a2ae 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -46,8 +46,8 @@  static void async_pf_execute(struct work_struct *work)
 {
 	struct kvm_async_pf *apf =
 		container_of(work, struct kvm_async_pf, work);
-	struct mm_struct *mm = apf->mm;
 	struct kvm_vcpu *vcpu = apf->vcpu;
+	struct mm_struct *mm = vcpu->kvm->mm;
 	unsigned long addr = apf->addr;
 	gpa_t cr2_or_gpa = apf->cr2_or_gpa;
 	int locked = 1;
@@ -56,16 +56,24 @@  static void async_pf_execute(struct work_struct *work)
 	might_sleep();
 
 	/*
-	 * This work is run asynchronously to the task which owns
-	 * mm and might be done in another context, so we must
-	 * access remotely.
+	 * Attempt to pin the VM's host address space, and simply skip gup() if
+	 * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
+	 * holds a reference to its associated mm_struct until the very end of
+	 * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
+	 * work item is fully processed.
 	 */
-	mmap_read_lock(mm);
-	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
-	if (locked)
-		mmap_read_unlock(mm);
-	mmput(mm);
+	if (mmget_not_zero(mm)) {
+		mmap_read_lock(mm);
+		get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
+		if (locked)
+			mmap_read_unlock(mm);
+		mmput(mm);
+	}
 
+	/*
+	 * Notify and kick the vCPU even if faulting in the page failed, e.g.
+	 * so that the vCPU can retry the fault synchronously.
+	 */
 	if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
 		kvm_arch_async_page_present(vcpu, apf);
 
@@ -129,10 +137,8 @@  void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_ASYNC_PF_SYNC
 		flush_work(&work->work);
 #else
-		if (cancel_work_sync(&work->work)) {
-			mmput(work->mm);
+		if (cancel_work_sync(&work->work))
 			kmem_cache_free(async_pf_cache, work);
-		}
 #endif
 		spin_lock(&vcpu->async_pf.lock);
 	}
@@ -211,8 +217,6 @@  bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	work->cr2_or_gpa = cr2_or_gpa;
 	work->addr = hva;
 	work->arch = *arch;
-	work->mm = current->mm;
-	mmget(work->mm);
 
 	INIT_WORK(&work->work, async_pf_execute);