diff mbox series

[v2,5/6] mm: implement folio wait under VMA lock

Message ID 20230609005158.2421285-6-surenb@google.com (mailing list archive)
State New
Headers show
Series Per-vma lock support for swap and userfaults | expand

Commit Message

Suren Baghdasaryan June 9, 2023, 12:51 a.m. UTC
Follow the same pattern as mmap_lock when waiting for folio by dropping
VMA lock before the wait and retrying once folio is available.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/pagemap.h | 14 ++++++++++----
 mm/filemap.c            | 43 ++++++++++++++++++++++-------------------
 mm/memory.c             | 13 ++++++++-----
 3 files changed, 41 insertions(+), 29 deletions(-)

Comments

Matthew Wilcox June 9, 2023, 3:03 p.m. UTC | #1
On Thu, Jun 08, 2023 at 05:51:57PM -0700, Suren Baghdasaryan wrote:
>  static inline bool folio_lock_or_retry(struct folio *folio,
> -		struct mm_struct *mm, unsigned int flags)
> +		struct vm_area_struct *vma, unsigned int flags,
> +		bool *lock_dropped)

I hate these double-return-value functions.

How about this for an API:

vm_fault_t folio_lock_fault(struct folio *folio, struct vm_fault *vmf)
{
	might_sleep();
	if (folio_trylock(folio))
		return 0;
	return __folio_lock_fault(folio, vmf);
}

Then the users look like ...

> @@ -3580,8 +3581,10 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
>  	if (!folio_try_get(folio))
>  		return 0;
>  
> -	if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
> +	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
>  		folio_put(folio);
> +		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> +			return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
>  		return VM_FAULT_RETRY;
>  	}

	ret = folio_lock_fault(folio, vmf);
	if (ret)
		return ret;

> @@ -3837,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  		goto out_release;
>  	}
>  
> -	locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
> -
> -	if (!locked) {
> +	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> +		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> +			ret |= VM_FAULT_VMA_UNLOCKED;
>  		ret |= VM_FAULT_RETRY;
>  		goto out_release;
>  	}

	ret |= folio_lock_fault(folio, vmf);
	if (ret & VM_FAULT_RETRY)
		goto out_release;

ie instead of trying to reconstruct what __folio_lock_fault() did from
its outputs, we just let folio_lock_fault() tell us what it did.
Suren Baghdasaryan June 9, 2023, 6:49 p.m. UTC | #2
On Fri, Jun 9, 2023 at 8:03 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, Jun 08, 2023 at 05:51:57PM -0700, Suren Baghdasaryan wrote:
> >  static inline bool folio_lock_or_retry(struct folio *folio,
> > -             struct mm_struct *mm, unsigned int flags)
> > +             struct vm_area_struct *vma, unsigned int flags,
> > +             bool *lock_dropped)
>
> I hate these double-return-value functions.
>
> How about this for an API:
>
> vm_fault_t folio_lock_fault(struct folio *folio, struct vm_fault *vmf)
> {
>         might_sleep();
>         if (folio_trylock(folio))
>                 return 0;
>         return __folio_lock_fault(folio, vmf);
> }
>
> Then the users look like ...
>
> > @@ -3580,8 +3581,10 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
> >       if (!folio_try_get(folio))
> >               return 0;
> >
> > -     if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
> > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> >               folio_put(folio);
> > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > +                     return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
> >               return VM_FAULT_RETRY;
> >       }
>
>         ret = folio_lock_fault(folio, vmf);
>         if (ret)
>                 return ret;
>
> > @@ -3837,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >               goto out_release;
> >       }
> >
> > -     locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
> > -
> > -     if (!locked) {
> > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > +                     ret |= VM_FAULT_VMA_UNLOCKED;
> >               ret |= VM_FAULT_RETRY;
> >               goto out_release;
> >       }
>
>         ret |= folio_lock_fault(folio, vmf);
>         if (ret & VM_FAULT_RETRY)
>                 goto out_release;
>
> ie instead of trying to reconstruct what __folio_lock_fault() did from
> its outputs, we just let folio_lock_fault() tell us what it did.

Thanks for taking a look!
Ok, I think what you are suggesting is to have a new set of
folio_lock_fault()/__folio_lock_fault() functions which return
vm_fault_t directly, __folio_lock_fault() will use
__folio_lock_or_retry() internally and will adjust its return value
based on __folio_lock_or_retry()'s return and the lock releasing rules
described in the comments for __folio_lock_or_retry(). Is my
understanding correct?
Suren Baghdasaryan June 9, 2023, 6:55 p.m. UTC | #3
On Fri, Jun 9, 2023 at 11:49 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Fri, Jun 9, 2023 at 8:03 AM Matthew Wilcox <willy@infradead.org> wrote:
> >
> > On Thu, Jun 08, 2023 at 05:51:57PM -0700, Suren Baghdasaryan wrote:
> > >  static inline bool folio_lock_or_retry(struct folio *folio,
> > > -             struct mm_struct *mm, unsigned int flags)
> > > +             struct vm_area_struct *vma, unsigned int flags,
> > > +             bool *lock_dropped)
> >
> > I hate these double-return-value functions.
> >
> > How about this for an API:
> >
> > vm_fault_t folio_lock_fault(struct folio *folio, struct vm_fault *vmf)
> > {
> >         might_sleep();
> >         if (folio_trylock(folio))
> >                 return 0;
> >         return __folio_lock_fault(folio, vmf);
> > }
> >
> > Then the users look like ...
> >
> > > @@ -3580,8 +3581,10 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
> > >       if (!folio_try_get(folio))
> > >               return 0;
> > >
> > > -     if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
> > > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> > >               folio_put(folio);
> > > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > > +                     return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
> > >               return VM_FAULT_RETRY;
> > >       }
> >
> >         ret = folio_lock_fault(folio, vmf);
> >         if (ret)
> >                 return ret;
> >
> > > @@ -3837,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> > >               goto out_release;
> > >       }
> > >
> > > -     locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
> > > -
> > > -     if (!locked) {
> > > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> > > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > > +                     ret |= VM_FAULT_VMA_UNLOCKED;
> > >               ret |= VM_FAULT_RETRY;
> > >               goto out_release;
> > >       }
> >
> >         ret |= folio_lock_fault(folio, vmf);
> >         if (ret & VM_FAULT_RETRY)
> >                 goto out_release;
> >
> > ie instead of trying to reconstruct what __folio_lock_fault() did from
> > its outputs, we just let folio_lock_fault() tell us what it did.
>
> Thanks for taking a look!
> Ok, I think what you are suggesting is to have a new set of
> folio_lock_fault()/__folio_lock_fault() functions which return
> vm_fault_t directly, __folio_lock_fault() will use
> __folio_lock_or_retry() internally and will adjust its return value
> based on __folio_lock_or_retry()'s return and the lock releasing rules
> described in the comments for __folio_lock_or_retry(). Is my
> understanding correct?

Oh, after rereading I think you are suggesting to replace
folio_lock_or_retry()/__folio_lock_or_retry() with
folio_lock_fault()/__folio_lock_fault(), not to add them. Is that
right?
Matthew Wilcox June 9, 2023, 7:36 p.m. UTC | #4
On Fri, Jun 09, 2023 at 11:55:29AM -0700, Suren Baghdasaryan wrote:
> Oh, after rereading I think you are suggesting to replace
> folio_lock_or_retry()/__folio_lock_or_retry() with
> folio_lock_fault()/__folio_lock_fault(), not to add them. Is that
> right?

Right.  It only has two callers.  And I'd do that before adding the
FAULT_VMA_LOCK handling to it.
Peter Xu June 9, 2023, 8:54 p.m. UTC | #5
On Thu, Jun 08, 2023 at 05:51:57PM -0700, Suren Baghdasaryan wrote:
> Follow the same pattern as mmap_lock when waiting for folio by dropping
> VMA lock before the wait and retrying once folio is available.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>  include/linux/pagemap.h | 14 ++++++++++----
>  mm/filemap.c            | 43 ++++++++++++++++++++++-------------------
>  mm/memory.c             | 13 ++++++++-----
>  3 files changed, 41 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index a56308a9d1a4..6c9493314c21 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -896,8 +896,8 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
>  
>  void __folio_lock(struct folio *folio);
>  int __folio_lock_killable(struct folio *folio);
> -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
> -				unsigned int flags);
> +bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
> +			   unsigned int flags, bool *lock_dropped);
>  void unlock_page(struct page *page);
>  void folio_unlock(struct folio *folio);
>  
> @@ -1002,10 +1002,16 @@ static inline int folio_lock_killable(struct folio *folio)
>   * __folio_lock_or_retry().
>   */
>  static inline bool folio_lock_or_retry(struct folio *folio,
> -		struct mm_struct *mm, unsigned int flags)
> +		struct vm_area_struct *vma, unsigned int flags,
> +		bool *lock_dropped)
>  {
>  	might_sleep();
> -	return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
> +	if (folio_trylock(folio)) {
> +		*lock_dropped = false;
> +		return true;
> +	}
> +
> +	return __folio_lock_or_retry(folio, vma, flags, lock_dropped);
>  }
>  
>  /*
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 7cb0a3776a07..838955635fbc 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1701,37 +1701,35 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
>  
>  /*
>   * Return values:
> - * true - folio is locked; mmap_lock is still held.
> + * true - folio is locked.
>   * false - folio is not locked.
> - *     mmap_lock has been released (mmap_read_unlock(), unless flags had both
> - *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
> - *     which case mmap_lock is still held.
> - *     If flags had FAULT_FLAG_VMA_LOCK set, meaning the operation is performed
> - *     with VMA lock only, the VMA lock is still held.
> + *
> + * lock_dropped indicates whether mmap_lock/VMA lock got dropped.
> + *     mmap_lock/VMA lock is dropped when function fails to lock the folio,
> + *     unless flags had both FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT
> + *     set, in which case mmap_lock/VMA lock is still held.

This seems to be a separate change to have "lock_dropped", would it worth a
separate patch for it if needed?

I do agree it's confusing and it might be the reason of this change, but I
think it may or may not help much.. as long as VM_FAULT_RETRY semantics
kept unchanged iiuc (it doesn't always imply mmap lock released, only if
!NOWAIT, which can be confusing too).

Especially that doesn't seem like a must for the vma change.  IIUC to
support vma lock here we can simply keep everything as before, but only
release proper lock based on the fault flag should work.  But maybe I just
missed something, so that relies on the answer to previous patch...

>   *
>   * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
> - * with the folio locked and the mmap_lock unperturbed.
> + * with the folio locked and the mmap_lock/VMA lock unperturbed.
>   */
> -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
> -			 unsigned int flags)
> +bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
> +			 unsigned int flags, bool *lock_dropped)
>  {
> -	/* Can't do this if not holding mmap_lock */
> -	if (flags & FAULT_FLAG_VMA_LOCK)
> -		return false;
> -
>  	if (fault_flag_allow_retry_first(flags)) {
> -		/*
> -		 * CAUTION! In this case, mmap_lock is not released
> -		 * even though return 0.
> -		 */
> -		if (flags & FAULT_FLAG_RETRY_NOWAIT)
> +		if (flags & FAULT_FLAG_RETRY_NOWAIT) {
> +			*lock_dropped = false;
>  			return false;
> +		}
>  
> -		mmap_read_unlock(mm);
> +		if (flags & FAULT_FLAG_VMA_LOCK)
> +			vma_end_read(vma);
> +		else
> +			mmap_read_unlock(vma->vm_mm);
>  		if (flags & FAULT_FLAG_KILLABLE)
>  			folio_wait_locked_killable(folio);
>  		else
>  			folio_wait_locked(folio);
> +		*lock_dropped = true;
>  		return false;
>  	}
>  	if (flags & FAULT_FLAG_KILLABLE) {
> @@ -1739,13 +1737,18 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
>  
>  		ret = __folio_lock_killable(folio);
>  		if (ret) {
> -			mmap_read_unlock(mm);
> +			if (flags & FAULT_FLAG_VMA_LOCK)
> +				vma_end_read(vma);
> +			else
> +				mmap_read_unlock(vma->vm_mm);
> +			*lock_dropped = true;
>  			return false;
>  		}
>  	} else {
>  		__folio_lock(folio);
>  	}
>  
> +	*lock_dropped = false;
>  	return true;
>  }
>  
> diff --git a/mm/memory.c b/mm/memory.c
> index c234f8085f1e..acb09a3aad53 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3568,6 +3568,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
>  	struct folio *folio = page_folio(vmf->page);
>  	struct vm_area_struct *vma = vmf->vma;
>  	struct mmu_notifier_range range;
> +	bool lock_dropped;
>  
>  	/*
>  	 * We need a reference to lock the folio because we don't hold
> @@ -3580,8 +3581,10 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
>  	if (!folio_try_get(folio))
>  		return 0;
>  
> -	if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
> +	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
>  		folio_put(folio);
> +		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> +			return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
>  		return VM_FAULT_RETRY;
>  	}
>  	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
> @@ -3704,7 +3707,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	bool exclusive = false;
>  	swp_entry_t entry;
>  	pte_t pte;
> -	int locked;
> +	bool lock_dropped;
>  	vm_fault_t ret = 0;
>  	void *shadow = NULL;
>  
> @@ -3837,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  		goto out_release;
>  	}
>  
> -	locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
> -
> -	if (!locked) {
> +	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> +		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> +			ret |= VM_FAULT_VMA_UNLOCKED;
>  		ret |= VM_FAULT_RETRY;
>  		goto out_release;
>  	}
> -- 
> 2.41.0.162.gfafddb0af9-goog
>
Suren Baghdasaryan June 9, 2023, 10:48 p.m. UTC | #6
On Fri, Jun 9, 2023 at 1:54 PM Peter Xu <peterx@redhat.com> wrote:
>
> On Thu, Jun 08, 2023 at 05:51:57PM -0700, Suren Baghdasaryan wrote:
> > Follow the same pattern as mmap_lock when waiting for folio by dropping
> > VMA lock before the wait and retrying once folio is available.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> >  include/linux/pagemap.h | 14 ++++++++++----
> >  mm/filemap.c            | 43 ++++++++++++++++++++++-------------------
> >  mm/memory.c             | 13 ++++++++-----
> >  3 files changed, 41 insertions(+), 29 deletions(-)
> >
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index a56308a9d1a4..6c9493314c21 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -896,8 +896,8 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
> >
> >  void __folio_lock(struct folio *folio);
> >  int __folio_lock_killable(struct folio *folio);
> > -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
> > -                             unsigned int flags);
> > +bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
> > +                        unsigned int flags, bool *lock_dropped);
> >  void unlock_page(struct page *page);
> >  void folio_unlock(struct folio *folio);
> >
> > @@ -1002,10 +1002,16 @@ static inline int folio_lock_killable(struct folio *folio)
> >   * __folio_lock_or_retry().
> >   */
> >  static inline bool folio_lock_or_retry(struct folio *folio,
> > -             struct mm_struct *mm, unsigned int flags)
> > +             struct vm_area_struct *vma, unsigned int flags,
> > +             bool *lock_dropped)
> >  {
> >       might_sleep();
> > -     return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
> > +     if (folio_trylock(folio)) {
> > +             *lock_dropped = false;
> > +             return true;
> > +     }
> > +
> > +     return __folio_lock_or_retry(folio, vma, flags, lock_dropped);
> >  }
> >
> >  /*
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 7cb0a3776a07..838955635fbc 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1701,37 +1701,35 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
> >
> >  /*
> >   * Return values:
> > - * true - folio is locked; mmap_lock is still held.
> > + * true - folio is locked.
> >   * false - folio is not locked.
> > - *     mmap_lock has been released (mmap_read_unlock(), unless flags had both
> > - *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
> > - *     which case mmap_lock is still held.
> > - *     If flags had FAULT_FLAG_VMA_LOCK set, meaning the operation is performed
> > - *     with VMA lock only, the VMA lock is still held.
> > + *
> > + * lock_dropped indicates whether mmap_lock/VMA lock got dropped.
> > + *     mmap_lock/VMA lock is dropped when function fails to lock the folio,
> > + *     unless flags had both FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT
> > + *     set, in which case mmap_lock/VMA lock is still held.
>
> This seems to be a separate change to have "lock_dropped", would it worth a
> separate patch for it if needed?

Yes, Matthew asked for the same and also to change the function to
return the flags directly which should make it cleaner. IOW when this
function drops the lock it will include VM_FAULT_VMA_UNLOCKED flag in
its return value. And that will be done in a separate patch.

>
> I do agree it's confusing and it might be the reason of this change, but I
> think it may or may not help much.. as long as VM_FAULT_RETRY semantics
> kept unchanged iiuc (it doesn't always imply mmap lock released, only if
> !NOWAIT, which can be confusing too).
>
> Especially that doesn't seem like a must for the vma change.  IIUC to
> support vma lock here we can simply keep everything as before, but only
> release proper lock based on the fault flag should work.  But maybe I just
> missed something, so that relies on the answer to previous patch...

That was my intention here, IOW I'm making the following replacement:

-             mmap_read_unlock(mm);
+             if (flags & FAULT_FLAG_VMA_LOCK)
+                     vma_end_read(vma);
+             else
+                     mmap_read_unlock(vma->vm_mm);

Did I miss something which makes the function work differently between
mmap_lock vs per-vma one?

>
> >   *
> >   * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
> > - * with the folio locked and the mmap_lock unperturbed.
> > + * with the folio locked and the mmap_lock/VMA lock unperturbed.
> >   */
> > -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
> > -                      unsigned int flags)
> > +bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
> > +                      unsigned int flags, bool *lock_dropped)
> >  {
> > -     /* Can't do this if not holding mmap_lock */
> > -     if (flags & FAULT_FLAG_VMA_LOCK)
> > -             return false;
> > -
> >       if (fault_flag_allow_retry_first(flags)) {
> > -             /*
> > -              * CAUTION! In this case, mmap_lock is not released
> > -              * even though return 0.
> > -              */
> > -             if (flags & FAULT_FLAG_RETRY_NOWAIT)
> > +             if (flags & FAULT_FLAG_RETRY_NOWAIT) {
> > +                     *lock_dropped = false;
> >                       return false;
> > +             }
> >
> > -             mmap_read_unlock(mm);
> > +             if (flags & FAULT_FLAG_VMA_LOCK)
> > +                     vma_end_read(vma);
> > +             else
> > +                     mmap_read_unlock(vma->vm_mm);
> >               if (flags & FAULT_FLAG_KILLABLE)
> >                       folio_wait_locked_killable(folio);
> >               else
> >                       folio_wait_locked(folio);
> > +             *lock_dropped = true;
> >               return false;
> >       }
> >       if (flags & FAULT_FLAG_KILLABLE) {
> > @@ -1739,13 +1737,18 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
> >
> >               ret = __folio_lock_killable(folio);
> >               if (ret) {
> > -                     mmap_read_unlock(mm);
> > +                     if (flags & FAULT_FLAG_VMA_LOCK)
> > +                             vma_end_read(vma);
> > +                     else
> > +                             mmap_read_unlock(vma->vm_mm);
> > +                     *lock_dropped = true;
> >                       return false;
> >               }
> >       } else {
> >               __folio_lock(folio);
> >       }
> >
> > +     *lock_dropped = false;
> >       return true;
> >  }
> >
> > diff --git a/mm/memory.c b/mm/memory.c
> > index c234f8085f1e..acb09a3aad53 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -3568,6 +3568,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
> >       struct folio *folio = page_folio(vmf->page);
> >       struct vm_area_struct *vma = vmf->vma;
> >       struct mmu_notifier_range range;
> > +     bool lock_dropped;
> >
> >       /*
> >        * We need a reference to lock the folio because we don't hold
> > @@ -3580,8 +3581,10 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
> >       if (!folio_try_get(folio))
> >               return 0;
> >
> > -     if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
> > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> >               folio_put(folio);
> > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > +                     return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
> >               return VM_FAULT_RETRY;
> >       }
> >       mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
> > @@ -3704,7 +3707,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >       bool exclusive = false;
> >       swp_entry_t entry;
> >       pte_t pte;
> > -     int locked;
> > +     bool lock_dropped;
> >       vm_fault_t ret = 0;
> >       void *shadow = NULL;
> >
> > @@ -3837,9 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >               goto out_release;
> >       }
> >
> > -     locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
> > -
> > -     if (!locked) {
> > +     if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
> > +             if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
> > +                     ret |= VM_FAULT_VMA_UNLOCKED;
> >               ret |= VM_FAULT_RETRY;
> >               goto out_release;
> >       }
> > --
> > 2.41.0.162.gfafddb0af9-goog
> >
>
> --
> Peter Xu
>
Suren Baghdasaryan June 9, 2023, 10:49 p.m. UTC | #7
On Fri, Jun 9, 2023 at 12:37 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Fri, Jun 09, 2023 at 11:55:29AM -0700, Suren Baghdasaryan wrote:
> > Oh, after rereading I think you are suggesting to replace
> > folio_lock_or_retry()/__folio_lock_or_retry() with
> > folio_lock_fault()/__folio_lock_fault(), not to add them. Is that
> > right?
>
> Right.  It only has two callers.  And I'd do that before adding the
> FAULT_VMA_LOCK handling to it.

Got it. Will make the changes.

Thanks folks for the review!
Peter Xu June 12, 2023, 1:56 p.m. UTC | #8
On Fri, Jun 09, 2023 at 03:48:04PM -0700, Suren Baghdasaryan wrote:
> That was my intention here, IOW I'm making the following replacement:
> 
> -             mmap_read_unlock(mm);
> +             if (flags & FAULT_FLAG_VMA_LOCK)
> +                     vma_end_read(vma);
> +             else
> +                     mmap_read_unlock(vma->vm_mm);
> 
> Did I miss something which makes the function work differently between
> mmap_lock vs per-vma one?

Nothing wrong I can see.  Thanks.
diff mbox series

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a56308a9d1a4..6c9493314c21 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -896,8 +896,8 @@  static inline bool wake_page_match(struct wait_page_queue *wait_page,
 
 void __folio_lock(struct folio *folio);
 int __folio_lock_killable(struct folio *folio);
-bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
-				unsigned int flags);
+bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
+			   unsigned int flags, bool *lock_dropped);
 void unlock_page(struct page *page);
 void folio_unlock(struct folio *folio);
 
@@ -1002,10 +1002,16 @@  static inline int folio_lock_killable(struct folio *folio)
  * __folio_lock_or_retry().
  */
 static inline bool folio_lock_or_retry(struct folio *folio,
-		struct mm_struct *mm, unsigned int flags)
+		struct vm_area_struct *vma, unsigned int flags,
+		bool *lock_dropped)
 {
 	might_sleep();
-	return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
+	if (folio_trylock(folio)) {
+		*lock_dropped = false;
+		return true;
+	}
+
+	return __folio_lock_or_retry(folio, vma, flags, lock_dropped);
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 7cb0a3776a07..838955635fbc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1701,37 +1701,35 @@  static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
 
 /*
  * Return values:
- * true - folio is locked; mmap_lock is still held.
+ * true - folio is locked.
  * false - folio is not locked.
- *     mmap_lock has been released (mmap_read_unlock(), unless flags had both
- *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
- *     which case mmap_lock is still held.
- *     If flags had FAULT_FLAG_VMA_LOCK set, meaning the operation is performed
- *     with VMA lock only, the VMA lock is still held.
+ *
+ * lock_dropped indicates whether mmap_lock/VMA lock got dropped.
+ *     mmap_lock/VMA lock is dropped when function fails to lock the folio,
+ *     unless flags had both FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT
+ *     set, in which case mmap_lock/VMA lock is still held.
  *
  * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
- * with the folio locked and the mmap_lock unperturbed.
+ * with the folio locked and the mmap_lock/VMA lock unperturbed.
  */
-bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
-			 unsigned int flags)
+bool __folio_lock_or_retry(struct folio *folio, struct vm_area_struct *vma,
+			 unsigned int flags, bool *lock_dropped)
 {
-	/* Can't do this if not holding mmap_lock */
-	if (flags & FAULT_FLAG_VMA_LOCK)
-		return false;
-
 	if (fault_flag_allow_retry_first(flags)) {
-		/*
-		 * CAUTION! In this case, mmap_lock is not released
-		 * even though return 0.
-		 */
-		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+		if (flags & FAULT_FLAG_RETRY_NOWAIT) {
+			*lock_dropped = false;
 			return false;
+		}
 
-		mmap_read_unlock(mm);
+		if (flags & FAULT_FLAG_VMA_LOCK)
+			vma_end_read(vma);
+		else
+			mmap_read_unlock(vma->vm_mm);
 		if (flags & FAULT_FLAG_KILLABLE)
 			folio_wait_locked_killable(folio);
 		else
 			folio_wait_locked(folio);
+		*lock_dropped = true;
 		return false;
 	}
 	if (flags & FAULT_FLAG_KILLABLE) {
@@ -1739,13 +1737,18 @@  bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
 
 		ret = __folio_lock_killable(folio);
 		if (ret) {
-			mmap_read_unlock(mm);
+			if (flags & FAULT_FLAG_VMA_LOCK)
+				vma_end_read(vma);
+			else
+				mmap_read_unlock(vma->vm_mm);
+			*lock_dropped = true;
 			return false;
 		}
 	} else {
 		__folio_lock(folio);
 	}
 
+	*lock_dropped = false;
 	return true;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index c234f8085f1e..acb09a3aad53 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3568,6 +3568,7 @@  static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
 	struct folio *folio = page_folio(vmf->page);
 	struct vm_area_struct *vma = vmf->vma;
 	struct mmu_notifier_range range;
+	bool lock_dropped;
 
 	/*
 	 * We need a reference to lock the folio because we don't hold
@@ -3580,8 +3581,10 @@  static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
 	if (!folio_try_get(folio))
 		return 0;
 
-	if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
+	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
 		folio_put(folio);
+		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
+			return VM_FAULT_VMA_UNLOCKED | VM_FAULT_RETRY;
 		return VM_FAULT_RETRY;
 	}
 	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
@@ -3704,7 +3707,7 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 	bool exclusive = false;
 	swp_entry_t entry;
 	pte_t pte;
-	int locked;
+	bool lock_dropped;
 	vm_fault_t ret = 0;
 	void *shadow = NULL;
 
@@ -3837,9 +3840,9 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 		goto out_release;
 	}
 
-	locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
-
-	if (!locked) {
+	if (!folio_lock_or_retry(folio, vma, vmf->flags, &lock_dropped)) {
+		if (lock_dropped && vmf->flags & FAULT_FLAG_VMA_LOCK)
+			ret |= VM_FAULT_VMA_UNLOCKED;
 		ret |= VM_FAULT_RETRY;
 		goto out_release;
 	}