diff mbox series

[2/3] mm: drop VMA lock before waiting for migration

Message ID 20230501175025.36233-2-surenb@google.com (mailing list archive)
State New
Headers show
Series [1/3] mm: handle swap page faults under VMA lock if page is uncontended | expand

Commit Message

Suren Baghdasaryan May 1, 2023, 5:50 p.m. UTC
migration_entry_wait does not need VMA lock, therefore it can be dropped
before waiting. Introduce VM_FAULT_VMA_UNLOCKED to indicate that VMA
lock was dropped while in handle_mm_fault().
Note that once VMA lock is dropped, the VMA reference can't be used as
there are no guarantees it was not freed.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 arch/arm64/mm/fault.c    |  3 ++-
 arch/powerpc/mm/fault.c  |  3 ++-
 arch/s390/mm/fault.c     |  3 ++-
 arch/x86/mm/fault.c      |  3 ++-
 include/linux/mm_types.h |  6 +++++-
 mm/memory.c              | 12 ++++++++++--
 6 files changed, 23 insertions(+), 7 deletions(-)

Comments

Alistair Popple May 2, 2023, 1:21 p.m. UTC | #1
Suren Baghdasaryan <surenb@google.com> writes:

[...]

> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 306a3d1a0fa6..b3b57c6da0e1 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
>   *				fsync() to complete (for synchronous page faults
>   *				in DAX)
>   * @VM_FAULT_COMPLETED:		->fault completed, meanwhile mmap lock released
> + * @VM_FAULT_VMA_UNLOCKED:	VMA lock was released

A note here saying vmf->vma should no longer be accessed would be nice.

>   * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
>   *
>   */
> @@ -1047,6 +1048,7 @@ enum vm_fault_reason {
>  	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
>  	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
>  	VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
> +	VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
>  	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
>  };
>  
> @@ -1070,7 +1072,9 @@ enum vm_fault_reason {
>  	{ VM_FAULT_RETRY,               "RETRY" },	\
>  	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
>  	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
> -	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
> +	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },	\
> +	{ VM_FAULT_COMPLETED,           "COMPLETED" },	\

VM_FAULT_COMPLETED isn't used in this patch, guessing that's snuck in
from one of the other patches in the series?

> +	{ VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
>  
>  struct vm_special_mapping {
>  	const char *name;	/* The name, e.g. "[vdso]". */
> diff --git a/mm/memory.c b/mm/memory.c
> index 41f45819a923..8222acf74fd3 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3714,8 +3714,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	entry = pte_to_swp_entry(vmf->orig_pte);
>  	if (unlikely(non_swap_entry(entry))) {
>  		if (is_migration_entry(entry)) {
> -			migration_entry_wait(vma->vm_mm, vmf->pmd,
> -					     vmf->address);
> +			/* Save mm in case VMA lock is dropped */
> +			struct mm_struct *mm = vma->vm_mm;
> +
> +			if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
> +				/* No need to hold VMA lock for migration */
> +				vma_end_read(vma);
> +				/* CAUTION! VMA can't be used after this */
> +				ret |= VM_FAULT_VMA_UNLOCKED;
> +			}
> +			migration_entry_wait(mm, vmf->pmd, vmf->address);
>  		} else if (is_device_exclusive_entry(entry)) {
>  			vmf->page = pfn_swap_entry_to_page(entry);
>  			ret = remove_device_exclusive_entry(vmf);
Matthew Wilcox May 2, 2023, 2:28 p.m. UTC | #2
On Mon, May 01, 2023 at 10:50:24AM -0700, Suren Baghdasaryan wrote:
> migration_entry_wait does not need VMA lock, therefore it can be dropped
> before waiting. Introduce VM_FAULT_VMA_UNLOCKED to indicate that VMA
> lock was dropped while in handle_mm_fault().
> Note that once VMA lock is dropped, the VMA reference can't be used as
> there are no guarantees it was not freed.

How about we introduce:

void vmf_end_read(struct vm_fault *vmf)
{
	if (!vmf->vma)
		return;
	vma_end_read(vmf->vma);
	vmf->vma = NULL;
}

Now we don't need a new flag, and calling vmf_end_read() is idempotent.

Oh, argh, we create the vmf too late.  We really need to hoist the
creation of vm_fault to the callers of handle_mm_fault().
Suren Baghdasaryan May 2, 2023, 4:39 p.m. UTC | #3
On Tue, May 2, 2023 at 6:26 AM 'Alistair Popple' via kernel-team
<kernel-team@android.com> wrote:
>
>
> Suren Baghdasaryan <surenb@google.com> writes:
>
> [...]
>
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 306a3d1a0fa6..b3b57c6da0e1 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
> >   *                           fsync() to complete (for synchronous page faults
> >   *                           in DAX)
> >   * @VM_FAULT_COMPLETED:              ->fault completed, meanwhile mmap lock released
> > + * @VM_FAULT_VMA_UNLOCKED:   VMA lock was released
>
> A note here saying vmf->vma should no longer be accessed would be nice.

Good idea. Will add in the next version. Thanks!

>
> >   * @VM_FAULT_HINDEX_MASK:    mask HINDEX value
> >   *
> >   */
> > @@ -1047,6 +1048,7 @@ enum vm_fault_reason {
> >       VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
> >       VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
> >       VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
> > +     VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
> >       VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
> >  };
> >
> > @@ -1070,7 +1072,9 @@ enum vm_fault_reason {
> >       { VM_FAULT_RETRY,               "RETRY" },      \
> >       { VM_FAULT_FALLBACK,            "FALLBACK" },   \
> >       { VM_FAULT_DONE_COW,            "DONE_COW" },   \
> > -     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
> > +     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },  \
> > +     { VM_FAULT_COMPLETED,           "COMPLETED" },  \
>
> VM_FAULT_COMPLETED isn't used in this patch, guessing that's snuck in
> from one of the other patches in the series?

I noticed that an entry for VM_FAULT_COMPLETED was missing and wanted
to fix that... Should I drop that?

>
> > +     { VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
> >
> >  struct vm_special_mapping {
> >       const char *name;       /* The name, e.g. "[vdso]". */
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 41f45819a923..8222acf74fd3 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -3714,8 +3714,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >       entry = pte_to_swp_entry(vmf->orig_pte);
> >       if (unlikely(non_swap_entry(entry))) {
> >               if (is_migration_entry(entry)) {
> > -                     migration_entry_wait(vma->vm_mm, vmf->pmd,
> > -                                          vmf->address);
> > +                     /* Save mm in case VMA lock is dropped */
> > +                     struct mm_struct *mm = vma->vm_mm;
> > +
> > +                     if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
> > +                             /* No need to hold VMA lock for migration */
> > +                             vma_end_read(vma);
> > +                             /* CAUTION! VMA can't be used after this */
> > +                             ret |= VM_FAULT_VMA_UNLOCKED;
> > +                     }
> > +                     migration_entry_wait(mm, vmf->pmd, vmf->address);
> >               } else if (is_device_exclusive_entry(entry)) {
> >                       vmf->page = pfn_swap_entry_to_page(entry);
> >                       ret = remove_device_exclusive_entry(vmf);
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>
Suren Baghdasaryan May 2, 2023, 4:41 p.m. UTC | #4
On Tue, May 2, 2023 at 7:28 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, May 01, 2023 at 10:50:24AM -0700, Suren Baghdasaryan wrote:
> > migration_entry_wait does not need VMA lock, therefore it can be dropped
> > before waiting. Introduce VM_FAULT_VMA_UNLOCKED to indicate that VMA
> > lock was dropped while in handle_mm_fault().
> > Note that once VMA lock is dropped, the VMA reference can't be used as
> > there are no guarantees it was not freed.
>
> How about we introduce:
>
> void vmf_end_read(struct vm_fault *vmf)
> {
>         if (!vmf->vma)
>                 return;
>         vma_end_read(vmf->vma);
>         vmf->vma = NULL;
> }
>
> Now we don't need a new flag, and calling vmf_end_read() is idempotent.
>
> Oh, argh, we create the vmf too late.  We really need to hoist the
> creation of vm_fault to the callers of handle_mm_fault().

Yeah, unfortunately vmf does not propagate all the way up to
do_user_addr_fault which needs to know that we dropped the lock.

>
Alistair Popple May 3, 2023, 1:03 p.m. UTC | #5
Suren Baghdasaryan <surenb@google.com> writes:

> On Tue, May 2, 2023 at 6:26 AM 'Alistair Popple' via kernel-team
> <kernel-team@android.com> wrote:
>>
>>
>> Suren Baghdasaryan <surenb@google.com> writes:
>>
>> [...]
>>
>> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> > index 306a3d1a0fa6..b3b57c6da0e1 100644
>> > --- a/include/linux/mm_types.h
>> > +++ b/include/linux/mm_types.h
>> > @@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
>> >   *                           fsync() to complete (for synchronous page faults
>> >   *                           in DAX)
>> >   * @VM_FAULT_COMPLETED:              ->fault completed, meanwhile mmap lock released
>> > + * @VM_FAULT_VMA_UNLOCKED:   VMA lock was released
>>
>> A note here saying vmf->vma should no longer be accessed would be nice.
>
> Good idea. Will add in the next version. Thanks!
>
>>
>> >   * @VM_FAULT_HINDEX_MASK:    mask HINDEX value
>> >   *
>> >   */
>> > @@ -1047,6 +1048,7 @@ enum vm_fault_reason {
>> >       VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
>> >       VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
>> >       VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
>> > +     VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
>> >       VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
>> >  };
>> >
>> > @@ -1070,7 +1072,9 @@ enum vm_fault_reason {
>> >       { VM_FAULT_RETRY,               "RETRY" },      \
>> >       { VM_FAULT_FALLBACK,            "FALLBACK" },   \
>> >       { VM_FAULT_DONE_COW,            "DONE_COW" },   \
>> > -     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
>> > +     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },  \
>> > +     { VM_FAULT_COMPLETED,           "COMPLETED" },  \
>>
>> VM_FAULT_COMPLETED isn't used in this patch, guessing that's snuck in
>> from one of the other patches in the series?
>
> I noticed that an entry for VM_FAULT_COMPLETED was missing and wanted
> to fix that... Should I drop that?

Oh ok. It would certainly be good to add but really it should be it's
own patch.

>>
>> > +     { VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
>> >
>> >  struct vm_special_mapping {
>> >       const char *name;       /* The name, e.g. "[vdso]". */
>> > diff --git a/mm/memory.c b/mm/memory.c
>> > index 41f45819a923..8222acf74fd3 100644
>> > --- a/mm/memory.c
>> > +++ b/mm/memory.c
>> > @@ -3714,8 +3714,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>> >       entry = pte_to_swp_entry(vmf->orig_pte);
>> >       if (unlikely(non_swap_entry(entry))) {
>> >               if (is_migration_entry(entry)) {
>> > -                     migration_entry_wait(vma->vm_mm, vmf->pmd,
>> > -                                          vmf->address);
>> > +                     /* Save mm in case VMA lock is dropped */
>> > +                     struct mm_struct *mm = vma->vm_mm;
>> > +
>> > +                     if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
>> > +                             /* No need to hold VMA lock for migration */
>> > +                             vma_end_read(vma);
>> > +                             /* CAUTION! VMA can't be used after this */
>> > +                             ret |= VM_FAULT_VMA_UNLOCKED;
>> > +                     }
>> > +                     migration_entry_wait(mm, vmf->pmd, vmf->address);
>> >               } else if (is_device_exclusive_entry(entry)) {
>> >                       vmf->page = pfn_swap_entry_to_page(entry);
>> >                       ret = remove_device_exclusive_entry(vmf);
>>
>> --
>> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>>
Suren Baghdasaryan May 3, 2023, 7:42 p.m. UTC | #6
On Wed, May 3, 2023 at 6:05 AM Alistair Popple <apopple@nvidia.com> wrote:
>
>
> Suren Baghdasaryan <surenb@google.com> writes:
>
> > On Tue, May 2, 2023 at 6:26 AM 'Alistair Popple' via kernel-team
> > <kernel-team@android.com> wrote:
> >>
> >>
> >> Suren Baghdasaryan <surenb@google.com> writes:
> >>
> >> [...]
> >>
> >> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> >> > index 306a3d1a0fa6..b3b57c6da0e1 100644
> >> > --- a/include/linux/mm_types.h
> >> > +++ b/include/linux/mm_types.h
> >> > @@ -1030,6 +1030,7 @@ typedef __bitwise unsigned int vm_fault_t;
> >> >   *                           fsync() to complete (for synchronous page faults
> >> >   *                           in DAX)
> >> >   * @VM_FAULT_COMPLETED:              ->fault completed, meanwhile mmap lock released
> >> > + * @VM_FAULT_VMA_UNLOCKED:   VMA lock was released
> >>
> >> A note here saying vmf->vma should no longer be accessed would be nice.
> >
> > Good idea. Will add in the next version. Thanks!
> >
> >>
> >> >   * @VM_FAULT_HINDEX_MASK:    mask HINDEX value
> >> >   *
> >> >   */
> >> > @@ -1047,6 +1048,7 @@ enum vm_fault_reason {
> >> >       VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
> >> >       VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
> >> >       VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
> >> > +     VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
> >> >       VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
> >> >  };
> >> >
> >> > @@ -1070,7 +1072,9 @@ enum vm_fault_reason {
> >> >       { VM_FAULT_RETRY,               "RETRY" },      \
> >> >       { VM_FAULT_FALLBACK,            "FALLBACK" },   \
> >> >       { VM_FAULT_DONE_COW,            "DONE_COW" },   \
> >> > -     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
> >> > +     { VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },  \
> >> > +     { VM_FAULT_COMPLETED,           "COMPLETED" },  \
> >>
> >> VM_FAULT_COMPLETED isn't used in this patch, guessing that's snuck in
> >> from one of the other patches in the series?
> >
> > I noticed that an entry for VM_FAULT_COMPLETED was missing and wanted
> > to fix that... Should I drop that?
>
> Oh ok. It would certainly be good to add but really it should be it's
> own patch.

Ack. Will split in the next version. Thanks!

>
> >>
> >> > +     { VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
> >> >
> >> >  struct vm_special_mapping {
> >> >       const char *name;       /* The name, e.g. "[vdso]". */
> >> > diff --git a/mm/memory.c b/mm/memory.c
> >> > index 41f45819a923..8222acf74fd3 100644
> >> > --- a/mm/memory.c
> >> > +++ b/mm/memory.c
> >> > @@ -3714,8 +3714,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >> >       entry = pte_to_swp_entry(vmf->orig_pte);
> >> >       if (unlikely(non_swap_entry(entry))) {
> >> >               if (is_migration_entry(entry)) {
> >> > -                     migration_entry_wait(vma->vm_mm, vmf->pmd,
> >> > -                                          vmf->address);
> >> > +                     /* Save mm in case VMA lock is dropped */
> >> > +                     struct mm_struct *mm = vma->vm_mm;
> >> > +
> >> > +                     if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
> >> > +                             /* No need to hold VMA lock for migration */
> >> > +                             vma_end_read(vma);
> >> > +                             /* CAUTION! VMA can't be used after this */
> >> > +                             ret |= VM_FAULT_VMA_UNLOCKED;
> >> > +                     }
> >> > +                     migration_entry_wait(mm, vmf->pmd, vmf->address);
> >> >               } else if (is_device_exclusive_entry(entry)) {
> >> >                       vmf->page = pfn_swap_entry_to_page(entry);
> >> >                       ret = remove_device_exclusive_entry(vmf);
> >>
> >> --
> >> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
> >>
>
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9e0db5c387e3..8fa281f49d61 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -602,7 +602,8 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	}
 	fault = handle_mm_fault(vma, addr & PAGE_MASK,
 				mm_flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 531177a4ee08..b27730f07141 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -494,7 +494,8 @@  static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	}
 
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b65144c392b0..cc923dbb0821 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -418,7 +418,8 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 		goto lock_mmap;
 	}
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 		goto out;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..ef62ab2fd211 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1347,7 +1347,8 @@  void do_user_addr_fault(struct pt_regs *regs,
 		goto lock_mmap;
 	}
 	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	vma_end_read(vma);
+	if (!(fault & VM_FAULT_VMA_UNLOCKED))
+		vma_end_read(vma);
 
 	if (!(fault & VM_FAULT_RETRY)) {
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..b3b57c6da0e1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1030,6 +1030,7 @@  typedef __bitwise unsigned int vm_fault_t;
  *				fsync() to complete (for synchronous page faults
  *				in DAX)
  * @VM_FAULT_COMPLETED:		->fault completed, meanwhile mmap lock released
+ * @VM_FAULT_VMA_UNLOCKED:	VMA lock was released
  * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
  *
  */
@@ -1047,6 +1048,7 @@  enum vm_fault_reason {
 	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
 	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
 	VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
+	VM_FAULT_VMA_UNLOCKED   = (__force vm_fault_t)0x008000,
 	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
 };
 
@@ -1070,7 +1072,9 @@  enum vm_fault_reason {
 	{ VM_FAULT_RETRY,               "RETRY" },	\
 	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
 	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
-	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
+	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" },	\
+	{ VM_FAULT_COMPLETED,           "COMPLETED" },	\
+	{ VM_FAULT_VMA_UNLOCKED,        "VMA_UNLOCKED" }
 
 struct vm_special_mapping {
 	const char *name;	/* The name, e.g. "[vdso]". */
diff --git a/mm/memory.c b/mm/memory.c
index 41f45819a923..8222acf74fd3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3714,8 +3714,16 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 	entry = pte_to_swp_entry(vmf->orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
 		if (is_migration_entry(entry)) {
-			migration_entry_wait(vma->vm_mm, vmf->pmd,
-					     vmf->address);
+			/* Save mm in case VMA lock is dropped */
+			struct mm_struct *mm = vma->vm_mm;
+
+			if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+				/* No need to hold VMA lock for migration */
+				vma_end_read(vma);
+				/* CAUTION! VMA can't be used after this */
+				ret |= VM_FAULT_VMA_UNLOCKED;
+			}
+			migration_entry_wait(mm, vmf->pmd, vmf->address);
 		} else if (is_device_exclusive_entry(entry)) {
 			vmf->page = pfn_swap_entry_to_page(entry);
 			ret = remove_device_exclusive_entry(vmf);