diff mbox series

[RFC,RESEND,17/28] mm/mmap: prevent pagefault handler from racing with mmu_notifier registration

Message ID 20220901173516.702122-18-surenb@google.com (mailing list archive)
State New, archived
Headers show
Series per-VMA locks proposal | expand

Commit Message

Suren Baghdasaryan Sept. 1, 2022, 5:35 p.m. UTC
Pagefault handlers might need to fire MMU notifications while a new
notifier is being registered. Modify mm_take_all_locks to mark all VMAs
as locked and prevent this race with fault handlers that would hold VMA
locks.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 mm/mmap.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Laurent Dufour Sept. 9, 2022, 2:20 p.m. UTC | #1
Le 01/09/2022 à 19:35, Suren Baghdasaryan a écrit :
> Pagefault handlers might need to fire MMU notifications while a new
> notifier is being registered. Modify mm_take_all_locks to mark all VMAs
> as locked and prevent this race with fault handlers that would hold VMA
> locks.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>  mm/mmap.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index b31cc97c2803..1edfcd384f5e 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3538,6 +3538,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
>   *     hugetlb mapping);
>   *   - all i_mmap_rwsem locks;
>   *   - all anon_vma->rwseml
> + *   - all vmas marked locked

IIRC, the anon_vma may be locked during the page fault handling, and this
happens after the VMA is read lock. I think the same applies to
i_mmap_rwsem lock.

Thus, the VMA should be marked locked first.

>   *
>   * We can take all locks within these types randomly because the VM code
>   * doesn't nest them and we protected from parallel mm_take_all_locks() by
> @@ -3579,6 +3580,7 @@ int mm_take_all_locks(struct mm_struct *mm)
>  		if (vma->anon_vma)
>  			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
>  				vm_lock_anon_vma(mm, avc->anon_vma);
> +		vma_mark_locked(vma);
>  	}
>  
>  	return 0;
> @@ -3636,6 +3638,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
>  	mmap_assert_write_locked(mm);
>  	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
>  
> +	vma_mark_unlocked_all(mm);
>  	for (vma = mm->mmap; vma; vma = vma->vm_next) {
>  		if (vma->anon_vma)
>  			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
Suren Baghdasaryan Sept. 9, 2022, 4:12 p.m. UTC | #2
On Fri, Sep 9, 2022 at 7:20 AM Laurent Dufour <ldufour@linux.ibm.com> wrote:
>
> Le 01/09/2022 à 19:35, Suren Baghdasaryan a écrit :
> > Pagefault handlers might need to fire MMU notifications while a new
> > notifier is being registered. Modify mm_take_all_locks to mark all VMAs
> > as locked and prevent this race with fault handlers that would hold VMA
> > locks.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> >  mm/mmap.c | 3 +++
> >  1 file changed, 3 insertions(+)
> >
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index b31cc97c2803..1edfcd384f5e 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -3538,6 +3538,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
> >   *     hugetlb mapping);
> >   *   - all i_mmap_rwsem locks;
> >   *   - all anon_vma->rwseml
> > + *   - all vmas marked locked
>
> IIRC, the anon_vma may be locked during the page fault handling, and this
> happens after the VMA is read lock. I think the same applies to
> i_mmap_rwsem lock.
>
> Thus, the VMA should be marked locked first.

I see. I'll double check and move the locking order. Thanks!

>
> >   *
> >   * We can take all locks within these types randomly because the VM code
> >   * doesn't nest them and we protected from parallel mm_take_all_locks() by
> > @@ -3579,6 +3580,7 @@ int mm_take_all_locks(struct mm_struct *mm)
> >               if (vma->anon_vma)
> >                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
> >                               vm_lock_anon_vma(mm, avc->anon_vma);
> > +             vma_mark_locked(vma);
> >       }
> >
> >       return 0;
> > @@ -3636,6 +3638,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
> >       mmap_assert_write_locked(mm);
> >       BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
> >
> > +     vma_mark_unlocked_all(mm);
> >       for (vma = mm->mmap; vma; vma = vma->vm_next) {
> >               if (vma->anon_vma)
> >                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
>
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index b31cc97c2803..1edfcd384f5e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3538,6 +3538,7 @@  static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  *     hugetlb mapping);
  *   - all i_mmap_rwsem locks;
  *   - all anon_vma->rwseml
+ *   - all vmas marked locked
  *
  * We can take all locks within these types randomly because the VM code
  * doesn't nest them and we protected from parallel mm_take_all_locks() by
@@ -3579,6 +3580,7 @@  int mm_take_all_locks(struct mm_struct *mm)
 		if (vma->anon_vma)
 			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
 				vm_lock_anon_vma(mm, avc->anon_vma);
+		vma_mark_locked(vma);
 	}
 
 	return 0;
@@ -3636,6 +3638,7 @@  void mm_drop_all_locks(struct mm_struct *mm)
 	mmap_assert_write_locked(mm);
 	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
+	vma_mark_unlocked_all(mm);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->anon_vma)
 			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)