Message ID | 20240531163217.1584450-3-Liam.Howlett@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Avoid MAP_FIXED gap exposure | expand |
On Fri, May 31, 2024 at 9:33 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote: > > Split the munmap function into a gathering of vmas and a cleanup of the > gathered vmas. This is necessary for the later patches in the series. > > Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> The refactoring looks correct but it's quite painful to verify all the pieces. Not sure if it could have been refactored in more gradual steps... Reviewed-by: Suren Baghdasaryan <surenb@google.com> > --- > mm/mmap.c | 143 ++++++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 101 insertions(+), 42 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 31d464e6a656..fad40d604c64 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2340,6 +2340,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) > > if (vma->vm_flags & VM_ACCOUNT) > nr_accounted += nrpages; > + nit: here and below a couple of unnecessary empty lines. > vm_stat_account(mm, vma->vm_flags, -nrpages); > remove_vma(vma, false); > } > @@ -2545,33 +2546,45 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, > vma->vm_userfaultfd_ctx, anon_vma_name(vma)); > } > > + > +static inline void abort_munmap_vmas(struct ma_state *mas_detach) > +{ > + struct vm_area_struct *vma; > + int limit; > + > + limit = mas_detach->index; > + mas_set(mas_detach, 0); > + /* Re-attach any detached VMAs */ > + mas_for_each(mas_detach, vma, limit) > + vma_mark_detached(vma, false); > + > + __mt_destroy(mas_detach->tree); > +} > + > /* > - * do_vmi_align_munmap() - munmap the aligned region from @start to @end. > + * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree > + * for removal at a later date. Handles splitting first and last if necessary > + * and marking the vmas as isolated. > + * > * @vmi: The vma iterator > * @vma: The starting vm_area_struct > * @mm: The mm_struct > * @start: The aligned start address to munmap. > * @end: The aligned end address to munmap. > * @uf: The userfaultfd list_head > - * @unlock: Set to true to drop the mmap_lock. unlocking only happens on > - * success. > + * @mas_detach: The maple state tracking the detached tree > * > - * Return: 0 on success and drops the lock if so directed, error and leaves the > - * lock held otherwise. > + * Return: 0 on success > */ > static int > -do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > +vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > struct mm_struct *mm, unsigned long start, > - unsigned long end, struct list_head *uf, bool unlock) > + unsigned long end, struct list_head *uf, > + struct ma_state *mas_detach, unsigned long *locked_vm) > { > - struct vm_area_struct *prev, *next = NULL; > - struct maple_tree mt_detach; > - int count = 0; > + struct vm_area_struct *next = NULL; > int error = -ENOMEM; > - unsigned long locked_vm = 0; > - MA_STATE(mas_detach, &mt_detach, 0, 0); > - mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); > - mt_on_stack(mt_detach); > + int count = 0; > > /* > * If we need to split any vma, do it now to save pain later. > @@ -2610,15 +2623,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > goto end_split_failed; > } > vma_start_write(next); > - mas_set(&mas_detach, count); > - error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); > + mas_set(mas_detach, count++); > + if (next->vm_flags & VM_LOCKED) > + *locked_vm += vma_pages(next); > + > + error = mas_store_gfp(mas_detach, next, GFP_KERNEL); > if (error) > goto munmap_gather_failed; > vma_mark_detached(next, true); > - if (next->vm_flags & VM_LOCKED) > - locked_vm += vma_pages(next); > - > - count++; > if (unlikely(uf)) { > /* > * If userfaultfd_unmap_prep returns an error the vmas > @@ -2643,7 +2655,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) > /* Make sure no VMAs are about to be lost. */ > { > - MA_STATE(test, &mt_detach, 0, 0); > + MA_STATE(test, mas_detach->tree, 0, 0); > struct vm_area_struct *vma_mas, *vma_test; > int test_count = 0; > > @@ -2663,13 +2675,29 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > while (vma_iter_addr(vmi) > start) > vma_iter_prev_range(vmi); > > - error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); > - if (error) > - goto clear_tree_failed; > + return 0; > > - /* Point of no return */ > - mm->locked_vm -= locked_vm; > +userfaultfd_error: > +munmap_gather_failed: > +end_split_failed: > + abort_munmap_vmas(mas_detach); > +start_split_failed: > +map_count_exceeded: > + return error; > +} > + > +static void > +vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > + struct mm_struct *mm, unsigned long start, > + unsigned long end, bool unlock, struct ma_state *mas_detach, > + unsigned long locked_vm) > +{ > + struct vm_area_struct *prev, *next; > + int count; > + > + count = mas_detach->index + 1; > mm->map_count -= count; > + mm->locked_vm -= locked_vm; > if (unlock) > mmap_write_downgrade(mm); > > @@ -2682,30 +2710,61 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > * We can free page tables without write-locking mmap_lock because VMAs > * were isolated before we downgraded mmap_lock. > */ > - mas_set(&mas_detach, 1); > - unmap_region(mm, &mas_detach, vma, prev, next, start, end, count, > + mas_set(mas_detach, 1); > + unmap_region(mm, mas_detach, vma, prev, next, start, end, count, > !unlock); > /* Statistics and freeing VMAs */ > - mas_set(&mas_detach, 0); > - remove_mt(mm, &mas_detach); > + mas_set(mas_detach, 0); > + remove_mt(mm, mas_detach); > validate_mm(mm); > if (unlock) > mmap_read_unlock(mm); > > - __mt_destroy(&mt_detach); > - return 0; > + __mt_destroy(mas_detach->tree); > +} > > -clear_tree_failed: > -userfaultfd_error: > -munmap_gather_failed: > -end_split_failed: > - mas_set(&mas_detach, 0); > - mas_for_each(&mas_detach, next, end) > - vma_mark_detached(next, false); > +/* > + * do_vmi_align_munmap() - munmap the aligned region from @start to @end. > + * @vmi: The vma iterator > + * @vma: The starting vm_area_struct > + * @mm: The mm_struct > + * @start: The aligned start address to munmap. > + * @end: The aligned end address to munmap. > + * @uf: The userfaultfd list_head > + * @unlock: Set to true to drop the mmap_lock. unlocking only happens on > + * success. > + * > + * Return: 0 on success and drops the lock if so directed, error and leaves the > + * lock held otherwise. > + */ > +static int > +do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > + struct mm_struct *mm, unsigned long start, > + unsigned long end, struct list_head *uf, bool unlock) > +{ > + struct maple_tree mt_detach; > + MA_STATE(mas_detach, &mt_detach, 0, 0); > + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); > + mt_on_stack(mt_detach); > + int error; > + unsigned long locked_vm = 0; > > - __mt_destroy(&mt_detach); > -start_split_failed: > -map_count_exceeded: > + error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, > + &mas_detach, &locked_vm); > + if (error) > + goto gather_failed; > + > + error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); > + if (error) > + goto clear_area_failed; > + > + vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, > + locked_vm); > + return 0; > + > +clear_area_failed: > + abort_munmap_vmas(&mas_detach); > +gather_failed: > validate_mm(mm); > return error; > } > -- > 2.43.0 >
* Suren Baghdasaryan <surenb@google.com> [240606 20:14]: > On Fri, May 31, 2024 at 9:33 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote: > > > > Split the munmap function into a gathering of vmas and a cleanup of the > > gathered vmas. This is necessary for the later patches in the series. > > > > Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> > > The refactoring looks correct but it's quite painful to verify all the > pieces. Not sure if it could have been refactored in more gradual > steps... Okay, I'll see if I can make this into smaller patches that still work. > > Reviewed-by: Suren Baghdasaryan <surenb@google.com> > > > --- > > mm/mmap.c | 143 ++++++++++++++++++++++++++++++++++++++---------------- > > 1 file changed, 101 insertions(+), 42 deletions(-) > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 31d464e6a656..fad40d604c64 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -2340,6 +2340,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) > > > > if (vma->vm_flags & VM_ACCOUNT) > > nr_accounted += nrpages; > > + > > nit: here and below a couple of unnecessary empty lines. Thanks. I'll remove them in the next revision.
diff --git a/mm/mmap.c b/mm/mmap.c index 31d464e6a656..fad40d604c64 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2340,6 +2340,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) if (vma->vm_flags & VM_ACCOUNT) nr_accounted += nrpages; + vm_stat_account(mm, vma->vm_flags, -nrpages); remove_vma(vma, false); } @@ -2545,33 +2546,45 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); } + +static inline void abort_munmap_vmas(struct ma_state *mas_detach) +{ + struct vm_area_struct *vma; + int limit; + + limit = mas_detach->index; + mas_set(mas_detach, 0); + /* Re-attach any detached VMAs */ + mas_for_each(mas_detach, vma, limit) + vma_mark_detached(vma, false); + + __mt_destroy(mas_detach->tree); +} + /* - * do_vmi_align_munmap() - munmap the aligned region from @start to @end. + * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree + * for removal at a later date. Handles splitting first and last if necessary + * and marking the vmas as isolated. + * * @vmi: The vma iterator * @vma: The starting vm_area_struct * @mm: The mm_struct * @start: The aligned start address to munmap. * @end: The aligned end address to munmap. * @uf: The userfaultfd list_head - * @unlock: Set to true to drop the mmap_lock. unlocking only happens on - * success. + * @mas_detach: The maple state tracking the detached tree * - * Return: 0 on success and drops the lock if so directed, error and leaves the - * lock held otherwise. + * Return: 0 on success */ static int -do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, +vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, - unsigned long end, struct list_head *uf, bool unlock) + unsigned long end, struct list_head *uf, + struct ma_state *mas_detach, unsigned long *locked_vm) { - struct vm_area_struct *prev, *next = NULL; - struct maple_tree mt_detach; - int count = 0; + struct vm_area_struct *next = NULL; int error = -ENOMEM; - unsigned long locked_vm = 0; - MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); - mt_on_stack(mt_detach); + int count = 0; /* * If we need to split any vma, do it now to save pain later. @@ -2610,15 +2623,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, goto end_split_failed; } vma_start_write(next); - mas_set(&mas_detach, count); - error = mas_store_gfp(&mas_detach, next, GFP_KERNEL); + mas_set(mas_detach, count++); + if (next->vm_flags & VM_LOCKED) + *locked_vm += vma_pages(next); + + error = mas_store_gfp(mas_detach, next, GFP_KERNEL); if (error) goto munmap_gather_failed; vma_mark_detached(next, true); - if (next->vm_flags & VM_LOCKED) - locked_vm += vma_pages(next); - - count++; if (unlikely(uf)) { /* * If userfaultfd_unmap_prep returns an error the vmas @@ -2643,7 +2655,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ { - MA_STATE(test, &mt_detach, 0, 0); + MA_STATE(test, mas_detach->tree, 0, 0); struct vm_area_struct *vma_mas, *vma_test; int test_count = 0; @@ -2663,13 +2675,29 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, while (vma_iter_addr(vmi) > start) vma_iter_prev_range(vmi); - error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); - if (error) - goto clear_tree_failed; + return 0; - /* Point of no return */ - mm->locked_vm -= locked_vm; +userfaultfd_error: +munmap_gather_failed: +end_split_failed: + abort_munmap_vmas(mas_detach); +start_split_failed: +map_count_exceeded: + return error; +} + +static void +vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, + struct mm_struct *mm, unsigned long start, + unsigned long end, bool unlock, struct ma_state *mas_detach, + unsigned long locked_vm) +{ + struct vm_area_struct *prev, *next; + int count; + + count = mas_detach->index + 1; mm->map_count -= count; + mm->locked_vm -= locked_vm; if (unlock) mmap_write_downgrade(mm); @@ -2682,30 +2710,61 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, * We can free page tables without write-locking mmap_lock because VMAs * were isolated before we downgraded mmap_lock. */ - mas_set(&mas_detach, 1); - unmap_region(mm, &mas_detach, vma, prev, next, start, end, count, + mas_set(mas_detach, 1); + unmap_region(mm, mas_detach, vma, prev, next, start, end, count, !unlock); /* Statistics and freeing VMAs */ - mas_set(&mas_detach, 0); - remove_mt(mm, &mas_detach); + mas_set(mas_detach, 0); + remove_mt(mm, mas_detach); validate_mm(mm); if (unlock) mmap_read_unlock(mm); - __mt_destroy(&mt_detach); - return 0; + __mt_destroy(mas_detach->tree); +} -clear_tree_failed: -userfaultfd_error: -munmap_gather_failed: -end_split_failed: - mas_set(&mas_detach, 0); - mas_for_each(&mas_detach, next, end) - vma_mark_detached(next, false); +/* + * do_vmi_align_munmap() - munmap the aligned region from @start to @end. + * @vmi: The vma iterator + * @vma: The starting vm_area_struct + * @mm: The mm_struct + * @start: The aligned start address to munmap. + * @end: The aligned end address to munmap. + * @uf: The userfaultfd list_head + * @unlock: Set to true to drop the mmap_lock. unlocking only happens on + * success. + * + * Return: 0 on success and drops the lock if so directed, error and leaves the + * lock held otherwise. + */ +static int +do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, + struct mm_struct *mm, unsigned long start, + unsigned long end, struct list_head *uf, bool unlock) +{ + struct maple_tree mt_detach; + MA_STATE(mas_detach, &mt_detach, 0, 0); + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); + mt_on_stack(mt_detach); + int error; + unsigned long locked_vm = 0; - __mt_destroy(&mt_detach); -start_split_failed: -map_count_exceeded: + error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, + &mas_detach, &locked_vm); + if (error) + goto gather_failed; + + error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); + if (error) + goto clear_area_failed; + + vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, + locked_vm); + return 0; + +clear_area_failed: + abort_munmap_vmas(&mas_detach); +gather_failed: validate_mm(mm); return error; }
Split the munmap function into a gathering of vmas and a cleanup of the gathered vmas. This is necessary for the later patches in the series. Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> --- mm/mmap.c | 143 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 42 deletions(-)