diff mbox series

[2/6] mm: Call wp_page_copy() under the VMA lock

Message ID 20230927052505.2855872-3-willy@infradead.org (mailing list archive)
State New
Headers show
Series Handle more faults under the VMA lock | expand

Commit Message

Matthew Wilcox Sept. 27, 2023, 5:25 a.m. UTC
It is usually safe to call wp_page_copy() under the VMA lock.  The only
unsafe situation is when no anon_vma has been allocated for this VMA,
and we have to look at adjacent VMAs to determine if their anon_vma can
be shared.  Since this happens only for the first COW of a page in this
VMA, the majority of calls to wp_page_copy() do not need to fall back
to the mmap_sem.

Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which
will return RETRY if we currently hold the VMA lock and need to allocate
an anon_vma.  This lets us drop the check in do_wp_page().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory.c | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

Comments

Suren Baghdasaryan Sept. 27, 2023, 10:38 p.m. UTC | #1
On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> It is usually safe to call wp_page_copy() under the VMA lock.  The only
> unsafe situation is when no anon_vma has been allocated for this VMA,
> and we have to look at adjacent VMAs to determine if their anon_vma can
> be shared.  Since this happens only for the first COW of a page in this
> VMA, the majority of calls to wp_page_copy() do not need to fall back
> to the mmap_sem.
>
> Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which
> will return RETRY if we currently hold the VMA lock and need to allocate
> an anon_vma.  This lets us drop the check in do_wp_page().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/memory.c | 39 ++++++++++++++++++++++++++-------------
>  1 file changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 97f860d6cd2a..cff78c496728 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>         count_vm_event(PGREUSE);
>  }
>
> +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
> +{
> +       struct vm_area_struct *vma = vmf->vma;
> +
> +       if (likely(vma->anon_vma))
> +               return 0;
> +       if (vmf->flags & FAULT_FLAG_VMA_LOCK) {

I don't think the above condition will happen today because
lock_vma_under_rcu() returns NULL and do_page_fault() falls back to
taking mmap_lock when !vma->anon_vma
(https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428).
We would need to narrow down that check in lock_vma_under_rcu() to
make this work here.

> +               vma_end_read(vma);
> +               return VM_FAULT_RETRY;
> +       }
> +       if (__anon_vma_prepare(vma))
> +               return VM_FAULT_OOM;
> +       return 0;
> +}
> +
>  /*
>   * Handle the case of a page which we actually need to copy to a new page,
>   * either due to COW or unsharing.
> @@ -3069,27 +3084,29 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>         pte_t entry;
>         int page_copied = 0;
>         struct mmu_notifier_range range;
> -       int ret;
> +       vm_fault_t ret;
>
>         delayacct_wpcopy_start();
>
>         if (vmf->page)
>                 old_folio = page_folio(vmf->page);
> -       if (unlikely(anon_vma_prepare(vma)))
> -               goto oom;
> +       ret = vmf_anon_prepare(vmf);
> +       if (unlikely(ret))
> +               goto out;
>
>         if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
>                 new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address);
>                 if (!new_folio)
>                         goto oom;
>         } else {
> +               int err;
>                 new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma,
>                                 vmf->address, false);
>                 if (!new_folio)
>                         goto oom;
>
> -               ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
> -               if (ret) {
> +               err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
> +               if (err) {
>                         /*
>                          * COW failed, if the fault was solved by other,
>                          * it's fine. If not, userspace would re-fault on
> @@ -3102,7 +3119,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>                                 folio_put(old_folio);
>
>                         delayacct_wpcopy_end();
> -                       return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
> +                       return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
>                 }
>                 kmsan_copy_page_meta(&new_folio->page, vmf->page);
>         }
> @@ -3212,11 +3229,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  oom_free_new:
>         folio_put(new_folio);
>  oom:
> +       ret = VM_FAULT_OOM;
> +out:
>         if (old_folio)
>                 folio_put(old_folio);
>
>         delayacct_wpcopy_end();
> -       return VM_FAULT_OOM;
> +       return ret;
>  }
>
>  /**
> @@ -3458,12 +3477,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
>                 return 0;
>         }
>  copy:
> -       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) {
> -               pte_unmap_unlock(vmf->pte, vmf->ptl);
> -               vma_end_read(vmf->vma);
> -               return VM_FAULT_RETRY;
> -       }
> -
>         /*
>          * Ok, we need to copy. Oh, well..
>          */
> --
> 2.40.1
>
Matthew Wilcox Sept. 28, 2023, 5:18 a.m. UTC | #2
On Wed, Sep 27, 2023 at 03:38:38PM -0700, Suren Baghdasaryan wrote:
> On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle)
> <willy@infradead.org> wrote:
> > It is usually safe to call wp_page_copy() under the VMA lock.  The only
> > unsafe situation is when no anon_vma has been allocated for this VMA,
> > and we have to look at adjacent VMAs to determine if their anon_vma can
> > be shared.  Since this happens only for the first COW of a page in this
> > VMA, the majority of calls to wp_page_copy() do not need to fall back
> > to the mmap_sem.
> >
> > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which
> > will return RETRY if we currently hold the VMA lock and need to allocate
> > an anon_vma.  This lets us drop the check in do_wp_page().
> >
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  mm/memory.c | 39 ++++++++++++++++++++++++++-------------
> >  1 file changed, 26 insertions(+), 13 deletions(-)
> >
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 97f860d6cd2a..cff78c496728 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
> >         count_vm_event(PGREUSE);
> >  }
> >
> > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
> > +{
> > +       struct vm_area_struct *vma = vmf->vma;
> > +
> > +       if (likely(vma->anon_vma))
> > +               return 0;
> > +       if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
> 
> I don't think the above condition will happen today because
> lock_vma_under_rcu() returns NULL and do_page_fault() falls back to
> taking mmap_lock when !vma->anon_vma
> (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428).
> We would need to narrow down that check in lock_vma_under_rcu() to
> make this work here.

That's only for anon VMAs.  For file-backed VMAs, we can get here ...

handle_pte_fault()
        if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
                if (!pte_write(entry))
                        return do_wp_page(vmf);

ie we we have a MAP_PRIVATE of a file, first take a read-fault on it,
then write to it.  That causes us to allocate an anon page in this
file-backed VMA, so we need an anon_vma to exist.
Suren Baghdasaryan Sept. 28, 2023, 2:57 p.m. UTC | #3
On Wed, Sep 27, 2023 at 10:18 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Sep 27, 2023 at 03:38:38PM -0700, Suren Baghdasaryan wrote:
> > On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle)
> > <willy@infradead.org> wrote:
> > > It is usually safe to call wp_page_copy() under the VMA lock.  The only
> > > unsafe situation is when no anon_vma has been allocated for this VMA,
> > > and we have to look at adjacent VMAs to determine if their anon_vma can
> > > be shared.  Since this happens only for the first COW of a page in this
> > > VMA, the majority of calls to wp_page_copy() do not need to fall back
> > > to the mmap_sem.
> > >
> > > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which
> > > will return RETRY if we currently hold the VMA lock and need to allocate
> > > an anon_vma.  This lets us drop the check in do_wp_page().
> > >
> > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > ---
> > >  mm/memory.c | 39 ++++++++++++++++++++++++++-------------
> > >  1 file changed, 26 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/mm/memory.c b/mm/memory.c
> > > index 97f860d6cd2a..cff78c496728 100644
> > > --- a/mm/memory.c
> > > +++ b/mm/memory.c
> > > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
> > >         count_vm_event(PGREUSE);
> > >  }
> > >
> > > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
> > > +{
> > > +       struct vm_area_struct *vma = vmf->vma;
> > > +
> > > +       if (likely(vma->anon_vma))
> > > +               return 0;
> > > +       if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
> >
> > I don't think the above condition will happen today because
> > lock_vma_under_rcu() returns NULL and do_page_fault() falls back to
> > taking mmap_lock when !vma->anon_vma
> > (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428).
> > We would need to narrow down that check in lock_vma_under_rcu() to
> > make this work here.
>
> That's only for anon VMAs.  For file-backed VMAs, we can get here ...
>
> handle_pte_fault()
>         if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
>                 if (!pte_write(entry))
>                         return do_wp_page(vmf);
>
> ie we we have a MAP_PRIVATE of a file, first take a read-fault on it,
> then write to it.  That causes us to allocate an anon page in this
> file-backed VMA, so we need an anon_vma to exist.

Oh, sorry, I completely missed that this check was only for anon VMAs.
Now it makes sense. Thanks!
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 97f860d6cd2a..cff78c496728 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3042,6 +3042,21 @@  static inline void wp_page_reuse(struct vm_fault *vmf)
 	count_vm_event(PGREUSE);
 }
 
+static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+
+	if (likely(vma->anon_vma))
+		return 0;
+	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+		vma_end_read(vma);
+		return VM_FAULT_RETRY;
+	}
+	if (__anon_vma_prepare(vma))
+		return VM_FAULT_OOM;
+	return 0;
+}
+
 /*
  * Handle the case of a page which we actually need to copy to a new page,
  * either due to COW or unsharing.
@@ -3069,27 +3084,29 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	pte_t entry;
 	int page_copied = 0;
 	struct mmu_notifier_range range;
-	int ret;
+	vm_fault_t ret;
 
 	delayacct_wpcopy_start();
 
 	if (vmf->page)
 		old_folio = page_folio(vmf->page);
-	if (unlikely(anon_vma_prepare(vma)))
-		goto oom;
+	ret = vmf_anon_prepare(vmf);
+	if (unlikely(ret))
+		goto out;
 
 	if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
 		new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address);
 		if (!new_folio)
 			goto oom;
 	} else {
+		int err;
 		new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma,
 				vmf->address, false);
 		if (!new_folio)
 			goto oom;
 
-		ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
-		if (ret) {
+		err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
+		if (err) {
 			/*
 			 * COW failed, if the fault was solved by other,
 			 * it's fine. If not, userspace would re-fault on
@@ -3102,7 +3119,7 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 				folio_put(old_folio);
 
 			delayacct_wpcopy_end();
-			return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
+			return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
 		}
 		kmsan_copy_page_meta(&new_folio->page, vmf->page);
 	}
@@ -3212,11 +3229,13 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 oom_free_new:
 	folio_put(new_folio);
 oom:
+	ret = VM_FAULT_OOM;
+out:
 	if (old_folio)
 		folio_put(old_folio);
 
 	delayacct_wpcopy_end();
-	return VM_FAULT_OOM;
+	return ret;
 }
 
 /**
@@ -3458,12 +3477,6 @@  static vm_fault_t do_wp_page(struct vm_fault *vmf)
 		return 0;
 	}
 copy:
-	if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) {
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		vma_end_read(vmf->vma);
-		return VM_FAULT_RETRY;
-	}
-
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */