Message ID | 95292b1d1215f49bd895f1aa38f54a8274c350af.1722849859.git.lorenzo.stoakes@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: remove vma_merge() | expand |
On Mon, 5 Aug 2024 13:13:51 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > Equally use struct vma_merge_struct to abstract parameters for VMA > expansion and shrinking. > > This leads the way to further refactoring and de-duplication by > standardising the interface. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > mm/mmap.c | 30 +++++++++++-------- > mm/vma.c | 66 ++++++++++++++++++----------------------- > mm/vma.h | 8 ++--- > tools/testing/vma/vma.c | 18 +++++++++-- > 4 files changed, 65 insertions(+), 57 deletions(-) > > diff --git a/mm/mmap.c b/mm/mmap.c > index 721ced6e37b0..04145347c245 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > pgoff_t pglen = len >> PAGE_SHIFT; > unsigned long charged = 0; > unsigned long end = addr + len; > - unsigned long merge_start = addr, merge_end = end; > bool writable_file_mapping = false; > int error; > VMA_ITERATOR(vmi, mm, addr); > @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > /* Attempt to expand an old mapping */ > /* Check next */ > if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { > - merge_end = next->vm_end; > - vma = next; > + /* We can adjust this as can_vma_merge_after() doesn't touch */ > + vmg.end = next->vm_end; > + vma = vmg.vma = next; > vmg.pgoff = next->vm_pgoff - pglen; > - } > > - if (vma) { > + /* We may merge our NULL anon_vma with non-NULL in next. */ > vmg.anon_vma = vma->anon_vma; > - vmg.uffd_ctx = vma->vm_userfaultfd_ctx; > } > > /* Check prev */ > if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) { > - merge_start = prev->vm_start; > - vma = prev; > + vmg.start = prev->vm_start; > + vma = vmg.vma = prev; > vmg.pgoff = prev->vm_pgoff; > } else if (prev) { > vma_iter_next_range(&vmi); > } > > /* Actually expand, if possible */ > - if (vma && > - !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) { > + if (vma && !vma_expand(&vmg)) { See? One more use of "next" that has gone away in the end... Petr T > khugepaged_enter_vma(vma, vm_flags); > goto expanded; > } > @@ -2359,6 +2356,13 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > VMA_ITERATOR(vmi, mm, new_start); > struct vm_area_struct *next; > struct mmu_gather tlb; > + struct vma_merge_struct vmg = { > + .vmi = &vmi, > + .vma = vma, > + .start = new_start, > + .end = old_end, > + .pgoff = vma->vm_pgoff, > + }; > > BUG_ON(new_start > new_end); > > @@ -2373,7 +2377,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > /* > * cover the whole range: [new_start, old_end) > */ > - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) > + if (vma_expand(&vmg)) > return -ENOMEM; > > /* > @@ -2406,6 +2410,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > tlb_finish_mmu(&tlb); > > vma_prev(&vmi); > + vmg.end = new_end; > + > /* Shrink the vma to just the new range */ > - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > + return vma_shrink(&vmg); > } > diff --git a/mm/vma.c b/mm/vma.c > index b452b472a085..3d6ce04f1b9c 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -489,30 +489,25 @@ void validate_mm(struct mm_struct *mm) > /* > * vma_expand - Expand an existing VMA > * > - * @vmi: The vma iterator > - * @vma: The vma to expand > - * @start: The start of the vma > - * @end: The exclusive end of the vma > - * @pgoff: The page offset of vma > - * @next: The current of next vma. > + * @vmg: Describes a VMA expansion operation. > * > - * Expand @vma to @start and @end. Can expand off the start and end. Will > - * expand over @next if it's different from @vma and @end == @next->vm_end. > - * Checking if the @vma can expand and merge with @next needs to be handled by > - * the caller. > + * Expand @vma to vmg->start and vmg->end. Can expand off the start and end. > + * Will expand over vmg->next if it's different from vmg->vma and vmg->end == > + * vmg->next->vm_end. Checking if the vmg->vma can expand and merge with > + * vmg->next needs to be handled by the caller. > * > * Returns: 0 on success > */ > -int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, > - unsigned long start, unsigned long end, pgoff_t pgoff, > - struct vm_area_struct *next) > +int vma_expand(struct vma_merge_struct *vmg) > { > struct vm_area_struct *anon_dup = NULL; > bool remove_next = false; > + struct vm_area_struct *vma = vmg->vma; > + struct vm_area_struct *next = vmg->next; > struct vma_prepare vp; > > vma_start_write(vma); > - if (next && (vma != next) && (end == next->vm_end)) { > + if (next && (vma != next) && (vmg->end == next->vm_end)) { > int ret; > > remove_next = true; > @@ -525,21 +520,21 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, > init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL); > /* Not merging but overwriting any part of next is not handled. */ > VM_WARN_ON(next && !vp.remove && > - next != vma && end > next->vm_start); > + next != vma && vmg->end > next->vm_start); > /* Only handles expanding */ > - VM_WARN_ON(vma->vm_start < start || vma->vm_end > end); > + VM_WARN_ON(vma->vm_start < vmg->start || vma->vm_end > vmg->end); > > /* Note: vma iterator must be pointing to 'start' */ > - vma_iter_config(vmi, start, end); > - if (vma_iter_prealloc(vmi, vma)) > + vma_iter_config(vmg->vmi, vmg->start, vmg->end); > + if (vma_iter_prealloc(vmg->vmi, vma)) > goto nomem; > > vma_prepare(&vp); > - vma_adjust_trans_huge(vma, start, end, 0); > - vma_set_range(vma, start, end, pgoff); > - vma_iter_store(vmi, vma); > + vma_adjust_trans_huge(vma, vmg->start, vmg->end, 0); > + vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); > + vma_iter_store(vmg->vmi, vma); > > - vma_complete(&vp, vmi, vma->vm_mm); > + vma_complete(&vp, vmg->vmi, vma->vm_mm); > return 0; > > nomem: > @@ -550,37 +545,34 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, > > /* > * vma_shrink() - Reduce an existing VMAs memory area > - * @vmi: The vma iterator > - * @vma: The VMA to modify > - * @start: The new start > - * @end: The new end > + * @vmg: Describes a VMA shrink operation. > * > * Returns: 0 on success, -ENOMEM otherwise > */ > -int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, > - unsigned long start, unsigned long end, pgoff_t pgoff) > +int vma_shrink(struct vma_merge_struct *vmg) > { > + struct vm_area_struct *vma = vmg->vma; > struct vma_prepare vp; > > - WARN_ON((vma->vm_start != start) && (vma->vm_end != end)); > + WARN_ON((vma->vm_start != vmg->start) && (vma->vm_end != vmg->end)); > > - if (vma->vm_start < start) > - vma_iter_config(vmi, vma->vm_start, start); > + if (vma->vm_start < vmg->start) > + vma_iter_config(vmg->vmi, vma->vm_start, vmg->start); > else > - vma_iter_config(vmi, end, vma->vm_end); > + vma_iter_config(vmg->vmi, vmg->end, vma->vm_end); > > - if (vma_iter_prealloc(vmi, NULL)) > + if (vma_iter_prealloc(vmg->vmi, NULL)) > return -ENOMEM; > > vma_start_write(vma); > > init_vma_prep(&vp, vma); > vma_prepare(&vp); > - vma_adjust_trans_huge(vma, start, end, 0); > + vma_adjust_trans_huge(vma, vmg->start, vmg->end, 0); > > - vma_iter_clear(vmi); > - vma_set_range(vma, start, end, pgoff); > - vma_complete(&vp, vmi, vma->vm_mm); > + vma_iter_clear(vmg->vmi); > + vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); > + vma_complete(&vp, vmg->vmi, vma->vm_mm); > return 0; > } > > diff --git a/mm/vma.h b/mm/vma.h > index c31684cc1da6..c464d25da120 100644 > --- a/mm/vma.h > +++ b/mm/vma.h > @@ -66,12 +66,8 @@ void init_vma_prep(struct vma_prepare *vp, > void vma_complete(struct vma_prepare *vp, > struct vma_iterator *vmi, struct mm_struct *mm); > > -int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, > - unsigned long start, unsigned long end, pgoff_t pgoff, > - struct vm_area_struct *next); > - > -int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, > - unsigned long start, unsigned long end, pgoff_t pgoff); > +int vma_expand(struct vma_merge_struct *vmg); > +int vma_shrink(struct vma_merge_struct *vmg); > > int > do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c > index 48e033c60d87..d216e51206c1 100644 > --- a/tools/testing/vma/vma.c > +++ b/tools/testing/vma/vma.c > @@ -142,10 +142,17 @@ static bool test_simple_expand(void) > struct mm_struct mm = {}; > struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, flags); > VMA_ITERATOR(vmi, &mm, 0); > + struct vma_merge_struct vmg = { > + .vmi = &vmi, > + .vma = vma, > + .start = 0, > + .end = 0x3000, > + .pgoff = 0, > + }; > > ASSERT_FALSE(vma_link(&mm, vma)); > > - ASSERT_FALSE(vma_expand(&vmi, vma, 0, 0x3000, 0, NULL)); > + ASSERT_FALSE(vma_expand(&vmg)); > > ASSERT_EQ(vma->vm_start, 0); > ASSERT_EQ(vma->vm_end, 0x3000); > @@ -163,10 +170,17 @@ static bool test_simple_shrink(void) > struct mm_struct mm = {}; > struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); > VMA_ITERATOR(vmi, &mm, 0); > + struct vma_merge_struct vmg = { > + .vmi = &vmi, > + .vma = vma, > + .start = 0, > + .end = 0x1000, > + .pgoff = 0, > + }; > > ASSERT_FALSE(vma_link(&mm, vma)); > > - ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); > + ASSERT_FALSE(vma_shrink(&vmg)); > > ASSERT_EQ(vma->vm_start, 0); > ASSERT_EQ(vma->vm_end, 0x1000);
On Thu, Aug 08, 2024 at 04:20:26PM GMT, Vlastimil Babka (SUSE) wrote: > On 8/5/24 14:13, Lorenzo Stoakes wrote: > > Equally use struct vma_merge_struct to abstract parameters for VMA > > expansion and shrinking. > > > > This leads the way to further refactoring and de-duplication by > > standardising the interface. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > --- > > mm/mmap.c | 30 +++++++++++-------- > > mm/vma.c | 66 ++++++++++++++++++----------------------- > > mm/vma.h | 8 ++--- > > tools/testing/vma/vma.c | 18 +++++++++-- > > 4 files changed, 65 insertions(+), 57 deletions(-) > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > index 721ced6e37b0..04145347c245 100644 > > --- a/mm/mmap.c > > +++ b/mm/mmap.c > > @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > pgoff_t pglen = len >> PAGE_SHIFT; > > unsigned long charged = 0; > > unsigned long end = addr + len; > > - unsigned long merge_start = addr, merge_end = end; > > bool writable_file_mapping = false; > > int error; > > VMA_ITERATOR(vmi, mm, addr); > > @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > /* Attempt to expand an old mapping */ > > /* Check next */ > > if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { > > - merge_end = next->vm_end; > > - vma = next; > > + /* We can adjust this as can_vma_merge_after() doesn't touch */ > > + vmg.end = next->vm_end; > > Ugh, ok but wonder how fragile that is. Yeah you're right this is a bit horrid, I'll find a way to make this less brittle. > > > + vma = vmg.vma = next; > > vmg.pgoff = next->vm_pgoff - pglen; > > - } > > > > - if (vma) { > > + /* We may merge our NULL anon_vma with non-NULL in next. */ > > Hm now I realize the if (vma) block probably didn't need to be added in > patch 2 only to removed here, it could have been part of the if (next && > ...) block above already? Which is not that important, but... You're right, will fix. > > > vmg.anon_vma = vma->anon_vma; > > - vmg.uffd_ctx = vma->vm_userfaultfd_ctx; > > I don't see why it's now ok to remove this line? Was it intended? In patch 2 > it made sense to me to add it so the can_vma_merge_after() still has the > right ctx for comparing, and this didn't change? Yeah, yikes, I think I was lost in the maelstrom of considering edge cases, and now this is broken for the whole prev vs. next uffd thing. The fact the mmap stuff is not directly testable is a factor here. TL;DR: I'll fix this, you're right. > > > } > > > > /* Check prev */ > > if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) { > > - merge_start = prev->vm_start; > > - vma = prev; > > + vmg.start = prev->vm_start; > > + vma = vmg.vma = prev; > > vmg.pgoff = prev->vm_pgoff; > > } else if (prev) { > > vma_iter_next_range(&vmi); > > } > > > > /* Actually expand, if possible */ > > - if (vma && > > - !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) { > > + if (vma && !vma_expand(&vmg)) { > > khugepaged_enter_vma(vma, vm_flags); > > goto expanded; > > } > > @@ -2359,6 +2356,13 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > VMA_ITERATOR(vmi, mm, new_start); > > struct vm_area_struct *next; > > struct mmu_gather tlb; > > + struct vma_merge_struct vmg = { > > + .vmi = &vmi, > > + .vma = vma, > > + .start = new_start, > > + .end = old_end, > > + .pgoff = vma->vm_pgoff, > > + }; > > > > BUG_ON(new_start > new_end); > > > > @@ -2373,7 +2377,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > /* > > * cover the whole range: [new_start, old_end) > > */ > > - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) > > + if (vma_expand(&vmg)) > > return -ENOMEM; > > > > /* > > @@ -2406,6 +2410,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > tlb_finish_mmu(&tlb); > > > > vma_prev(&vmi); > > + vmg.end = new_end; > > + > > /* Shrink the vma to just the new range */ > > - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > > + return vma_shrink(&vmg); > > The vma_shrink() doesn't seem to benefit that much from vmg conversion but I > guess why not. Maybe this will further change anyway... > No it doesn't, but it's more about being consistent with vma_expand(). We maybe want to find a way to unite them possibly.
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240808 11:46]: > On Thu, Aug 08, 2024 at 04:20:26PM GMT, Vlastimil Babka (SUSE) wrote: > > On 8/5/24 14:13, Lorenzo Stoakes wrote: > > > Equally use struct vma_merge_struct to abstract parameters for VMA > > > expansion and shrinking. > > > > > > This leads the way to further refactoring and de-duplication by > > > standardising the interface. > > > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > > --- > > > mm/mmap.c | 30 +++++++++++-------- > > > mm/vma.c | 66 ++++++++++++++++++----------------------- > > > mm/vma.h | 8 ++--- > > > tools/testing/vma/vma.c | 18 +++++++++-- > > > 4 files changed, 65 insertions(+), 57 deletions(-) > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > index 721ced6e37b0..04145347c245 100644 > > > --- a/mm/mmap.c > > > +++ b/mm/mmap.c > > > @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > pgoff_t pglen = len >> PAGE_SHIFT; > > > unsigned long charged = 0; > > > unsigned long end = addr + len; > > > - unsigned long merge_start = addr, merge_end = end; > > > bool writable_file_mapping = false; > > > int error; > > > VMA_ITERATOR(vmi, mm, addr); > > > @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > /* Attempt to expand an old mapping */ > > > /* Check next */ > > > if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { > > > - merge_end = next->vm_end; > > > - vma = next; > > > + /* We can adjust this as can_vma_merge_after() doesn't touch */ > > > + vmg.end = next->vm_end; > > > > Ugh, ok but wonder how fragile that is. > > Yeah you're right this is a bit horrid, I'll find a way to make this less > brittle. > > > > > > + vma = vmg.vma = next; > > > vmg.pgoff = next->vm_pgoff - pglen; > > > - } > > > > > > - if (vma) { > > > + /* We may merge our NULL anon_vma with non-NULL in next. */ > > > > Hm now I realize the if (vma) block probably didn't need to be added in > > patch 2 only to removed here, it could have been part of the if (next && > > ...) block above already? Which is not that important, but... > > You're right, will fix. > > > > > > vmg.anon_vma = vma->anon_vma; > > > - vmg.uffd_ctx = vma->vm_userfaultfd_ctx; > > > > I don't see why it's now ok to remove this line? Was it intended? In patch 2 > > it made sense to me to add it so the can_vma_merge_after() still has the > > right ctx for comparing, and this didn't change? > > Yeah, yikes, I think I was lost in the maelstrom of considering edge cases, > and now this is broken for the whole prev vs. next uffd thing. > > The fact the mmap stuff is not directly testable is a factor here. > > TL;DR: I'll fix this, you're right. > > > > > > } > > > > > > /* Check prev */ > > > if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) { > > > - merge_start = prev->vm_start; > > > - vma = prev; > > > + vmg.start = prev->vm_start; > > > + vma = vmg.vma = prev; > > > vmg.pgoff = prev->vm_pgoff; > > > } else if (prev) { > > > vma_iter_next_range(&vmi); > > > } > > > > > > /* Actually expand, if possible */ > > > - if (vma && > > > - !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) { > > > + if (vma && !vma_expand(&vmg)) { > > > khugepaged_enter_vma(vma, vm_flags); > > > goto expanded; > > > } > > > @@ -2359,6 +2356,13 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > VMA_ITERATOR(vmi, mm, new_start); > > > struct vm_area_struct *next; > > > struct mmu_gather tlb; > > > + struct vma_merge_struct vmg = { > > > + .vmi = &vmi, > > > + .vma = vma, > > > + .start = new_start, > > > + .end = old_end, > > > + .pgoff = vma->vm_pgoff, > > > + }; > > > > > > BUG_ON(new_start > new_end); > > > > > > @@ -2373,7 +2377,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > /* > > > * cover the whole range: [new_start, old_end) > > > */ > > > - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) > > > + if (vma_expand(&vmg)) > > > return -ENOMEM; > > > > > > /* > > > @@ -2406,6 +2410,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > tlb_finish_mmu(&tlb); > > > > > > vma_prev(&vmi); > > > + vmg.end = new_end; > > > + > > > /* Shrink the vma to just the new range */ > > > - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > > > + return vma_shrink(&vmg); > > > > The vma_shrink() doesn't seem to benefit that much from vmg conversion but I > > guess why not. Maybe this will further change anyway... > > > > No it doesn't, but it's more about being consistent with vma_expand(). We > maybe want to find a way to unite them possibly. No, we probably should not unite them - the shrink happens in a single place on setup.
On Thu, Aug 08, 2024 at 04:20:57PM GMT, Liam R. Howlett wrote: > * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240808 11:46]: > > On Thu, Aug 08, 2024 at 04:20:26PM GMT, Vlastimil Babka (SUSE) wrote: > > > On 8/5/24 14:13, Lorenzo Stoakes wrote: > > > > Equally use struct vma_merge_struct to abstract parameters for VMA > > > > expansion and shrinking. > > > > > > > > This leads the way to further refactoring and de-duplication by > > > > standardising the interface. > > > > > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > > > --- > > > > mm/mmap.c | 30 +++++++++++-------- > > > > mm/vma.c | 66 ++++++++++++++++++----------------------- > > > > mm/vma.h | 8 ++--- > > > > tools/testing/vma/vma.c | 18 +++++++++-- > > > > 4 files changed, 65 insertions(+), 57 deletions(-) > > > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > > index 721ced6e37b0..04145347c245 100644 > > > > --- a/mm/mmap.c > > > > +++ b/mm/mmap.c > > > > @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > > pgoff_t pglen = len >> PAGE_SHIFT; > > > > unsigned long charged = 0; > > > > unsigned long end = addr + len; > > > > - unsigned long merge_start = addr, merge_end = end; > > > > bool writable_file_mapping = false; > > > > int error; > > > > VMA_ITERATOR(vmi, mm, addr); > > > > @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > > /* Attempt to expand an old mapping */ > > > > /* Check next */ > > > > if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { > > > > - merge_end = next->vm_end; > > > > - vma = next; > > > > + /* We can adjust this as can_vma_merge_after() doesn't touch */ > > > > + vmg.end = next->vm_end; > > > > > > Ugh, ok but wonder how fragile that is. > > > > Yeah you're right this is a bit horrid, I'll find a way to make this less > > brittle. > > > > > > > > > + vma = vmg.vma = next; > > > > vmg.pgoff = next->vm_pgoff - pglen; > > > > - } > > > > > > > > - if (vma) { > > > > + /* We may merge our NULL anon_vma with non-NULL in next. */ > > > > > > Hm now I realize the if (vma) block probably didn't need to be added in > > > patch 2 only to removed here, it could have been part of the if (next && > > > ...) block above already? Which is not that important, but... > > > > You're right, will fix. > > > > > > > > > vmg.anon_vma = vma->anon_vma; > > > > - vmg.uffd_ctx = vma->vm_userfaultfd_ctx; > > > > > > I don't see why it's now ok to remove this line? Was it intended? In patch 2 > > > it made sense to me to add it so the can_vma_merge_after() still has the > > > right ctx for comparing, and this didn't change? > > > > Yeah, yikes, I think I was lost in the maelstrom of considering edge cases, > > and now this is broken for the whole prev vs. next uffd thing. > > > > The fact the mmap stuff is not directly testable is a factor here. > > > > TL;DR: I'll fix this, you're right. > > > > > > > > > } > > > > > > > > /* Check prev */ > > > > if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) { > > > > - merge_start = prev->vm_start; > > > > - vma = prev; > > > > + vmg.start = prev->vm_start; > > > > + vma = vmg.vma = prev; > > > > vmg.pgoff = prev->vm_pgoff; > > > > } else if (prev) { > > > > vma_iter_next_range(&vmi); > > > > } > > > > > > > > /* Actually expand, if possible */ > > > > - if (vma && > > > > - !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) { > > > > + if (vma && !vma_expand(&vmg)) { > > > > khugepaged_enter_vma(vma, vm_flags); > > > > goto expanded; > > > > } > > > > @@ -2359,6 +2356,13 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > > VMA_ITERATOR(vmi, mm, new_start); > > > > struct vm_area_struct *next; > > > > struct mmu_gather tlb; > > > > + struct vma_merge_struct vmg = { > > > > + .vmi = &vmi, > > > > + .vma = vma, > > > > + .start = new_start, > > > > + .end = old_end, > > > > + .pgoff = vma->vm_pgoff, > > > > + }; > > > > > > > > BUG_ON(new_start > new_end); > > > > > > > > @@ -2373,7 +2377,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > > /* > > > > * cover the whole range: [new_start, old_end) > > > > */ > > > > - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) > > > > + if (vma_expand(&vmg)) > > > > return -ENOMEM; > > > > > > > > /* > > > > @@ -2406,6 +2410,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > > > > tlb_finish_mmu(&tlb); > > > > > > > > vma_prev(&vmi); > > > > + vmg.end = new_end; > > > > + > > > > /* Shrink the vma to just the new range */ > > > > - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); > > > > + return vma_shrink(&vmg); > > > > > > The vma_shrink() doesn't seem to benefit that much from vmg conversion but I > > > guess why not. Maybe this will further change anyway... > > > > > > > No it doesn't, but it's more about being consistent with vma_expand(). We > > maybe want to find a way to unite them possibly. > > No, we probably should not unite them - the shrink happens in a single > place on setup. > Ack will in that case un-vmg vma_shrink().
On Thu, Aug 08, 2024 at 04:45:53PM GMT, Lorenzo Stoakes wrote: > On Thu, Aug 08, 2024 at 04:20:26PM GMT, Vlastimil Babka (SUSE) wrote: > > On 8/5/24 14:13, Lorenzo Stoakes wrote: > > > Equally use struct vma_merge_struct to abstract parameters for VMA > > > expansion and shrinking. > > > > > > This leads the way to further refactoring and de-duplication by > > > standardising the interface. > > > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > > --- > > > mm/mmap.c | 30 +++++++++++-------- > > > mm/vma.c | 66 ++++++++++++++++++----------------------- > > > mm/vma.h | 8 ++--- > > > tools/testing/vma/vma.c | 18 +++++++++-- > > > 4 files changed, 65 insertions(+), 57 deletions(-) > > > > > > diff --git a/mm/mmap.c b/mm/mmap.c > > > index 721ced6e37b0..04145347c245 100644 > > > --- a/mm/mmap.c > > > +++ b/mm/mmap.c > > > @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > pgoff_t pglen = len >> PAGE_SHIFT; > > > unsigned long charged = 0; > > > unsigned long end = addr + len; > > > - unsigned long merge_start = addr, merge_end = end; > > > bool writable_file_mapping = false; > > > int error; > > > VMA_ITERATOR(vmi, mm, addr); > > > @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > > > /* Attempt to expand an old mapping */ > > > /* Check next */ > > > if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { > > > - merge_end = next->vm_end; > > > - vma = next; > > > + /* We can adjust this as can_vma_merge_after() doesn't touch */ > > > + vmg.end = next->vm_end; > > > > Ugh, ok but wonder how fragile that is. > > Yeah you're right this is a bit horrid, I'll find a way to make this less > brittle. FYI for when I send out the v2 respin: Actually, as I work through it now, I think this is OK as-is (I'll remove the comment as it's confusing though). The next block checks prev, so the end of the VMA doesn't really matter, and in any case isn't checked by can_vma_merge_after(), but rather by the prev->vm_end == addr conditional below. I've addressed your other comments. [snip]
diff --git a/mm/mmap.c b/mm/mmap.c index 721ced6e37b0..04145347c245 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1367,7 +1367,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, pgoff_t pglen = len >> PAGE_SHIFT; unsigned long charged = 0; unsigned long end = addr + len; - unsigned long merge_start = addr, merge_end = end; bool writable_file_mapping = false; int error; VMA_ITERATOR(vmi, mm, addr); @@ -1423,28 +1422,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Attempt to expand an old mapping */ /* Check next */ if (next && next->vm_start == end && can_vma_merge_before(&vmg)) { - merge_end = next->vm_end; - vma = next; + /* We can adjust this as can_vma_merge_after() doesn't touch */ + vmg.end = next->vm_end; + vma = vmg.vma = next; vmg.pgoff = next->vm_pgoff - pglen; - } - if (vma) { + /* We may merge our NULL anon_vma with non-NULL in next. */ vmg.anon_vma = vma->anon_vma; - vmg.uffd_ctx = vma->vm_userfaultfd_ctx; } /* Check prev */ if (prev && prev->vm_end == addr && can_vma_merge_after(&vmg)) { - merge_start = prev->vm_start; - vma = prev; + vmg.start = prev->vm_start; + vma = vmg.vma = prev; vmg.pgoff = prev->vm_pgoff; } else if (prev) { vma_iter_next_range(&vmi); } /* Actually expand, if possible */ - if (vma && - !vma_expand(&vmi, vma, merge_start, merge_end, vmg.pgoff, next)) { + if (vma && !vma_expand(&vmg)) { khugepaged_enter_vma(vma, vm_flags); goto expanded; } @@ -2359,6 +2356,13 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) VMA_ITERATOR(vmi, mm, new_start); struct vm_area_struct *next; struct mmu_gather tlb; + struct vma_merge_struct vmg = { + .vmi = &vmi, + .vma = vma, + .start = new_start, + .end = old_end, + .pgoff = vma->vm_pgoff, + }; BUG_ON(new_start > new_end); @@ -2373,7 +2377,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) /* * cover the whole range: [new_start, old_end) */ - if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL)) + if (vma_expand(&vmg)) return -ENOMEM; /* @@ -2406,6 +2410,8 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) tlb_finish_mmu(&tlb); vma_prev(&vmi); + vmg.end = new_end; + /* Shrink the vma to just the new range */ - return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); + return vma_shrink(&vmg); } diff --git a/mm/vma.c b/mm/vma.c index b452b472a085..3d6ce04f1b9c 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -489,30 +489,25 @@ void validate_mm(struct mm_struct *mm) /* * vma_expand - Expand an existing VMA * - * @vmi: The vma iterator - * @vma: The vma to expand - * @start: The start of the vma - * @end: The exclusive end of the vma - * @pgoff: The page offset of vma - * @next: The current of next vma. + * @vmg: Describes a VMA expansion operation. * - * Expand @vma to @start and @end. Can expand off the start and end. Will - * expand over @next if it's different from @vma and @end == @next->vm_end. - * Checking if the @vma can expand and merge with @next needs to be handled by - * the caller. + * Expand @vma to vmg->start and vmg->end. Can expand off the start and end. + * Will expand over vmg->next if it's different from vmg->vma and vmg->end == + * vmg->next->vm_end. Checking if the vmg->vma can expand and merge with + * vmg->next needs to be handled by the caller. * * Returns: 0 on success */ -int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff, - struct vm_area_struct *next) +int vma_expand(struct vma_merge_struct *vmg) { struct vm_area_struct *anon_dup = NULL; bool remove_next = false; + struct vm_area_struct *vma = vmg->vma; + struct vm_area_struct *next = vmg->next; struct vma_prepare vp; vma_start_write(vma); - if (next && (vma != next) && (end == next->vm_end)) { + if (next && (vma != next) && (vmg->end == next->vm_end)) { int ret; remove_next = true; @@ -525,21 +520,21 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL); /* Not merging but overwriting any part of next is not handled. */ VM_WARN_ON(next && !vp.remove && - next != vma && end > next->vm_start); + next != vma && vmg->end > next->vm_start); /* Only handles expanding */ - VM_WARN_ON(vma->vm_start < start || vma->vm_end > end); + VM_WARN_ON(vma->vm_start < vmg->start || vma->vm_end > vmg->end); /* Note: vma iterator must be pointing to 'start' */ - vma_iter_config(vmi, start, end); - if (vma_iter_prealloc(vmi, vma)) + vma_iter_config(vmg->vmi, vmg->start, vmg->end); + if (vma_iter_prealloc(vmg->vmi, vma)) goto nomem; vma_prepare(&vp); - vma_adjust_trans_huge(vma, start, end, 0); - vma_set_range(vma, start, end, pgoff); - vma_iter_store(vmi, vma); + vma_adjust_trans_huge(vma, vmg->start, vmg->end, 0); + vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); + vma_iter_store(vmg->vmi, vma); - vma_complete(&vp, vmi, vma->vm_mm); + vma_complete(&vp, vmg->vmi, vma->vm_mm); return 0; nomem: @@ -550,37 +545,34 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, /* * vma_shrink() - Reduce an existing VMAs memory area - * @vmi: The vma iterator - * @vma: The VMA to modify - * @start: The new start - * @end: The new end + * @vmg: Describes a VMA shrink operation. * * Returns: 0 on success, -ENOMEM otherwise */ -int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff) +int vma_shrink(struct vma_merge_struct *vmg) { + struct vm_area_struct *vma = vmg->vma; struct vma_prepare vp; - WARN_ON((vma->vm_start != start) && (vma->vm_end != end)); + WARN_ON((vma->vm_start != vmg->start) && (vma->vm_end != vmg->end)); - if (vma->vm_start < start) - vma_iter_config(vmi, vma->vm_start, start); + if (vma->vm_start < vmg->start) + vma_iter_config(vmg->vmi, vma->vm_start, vmg->start); else - vma_iter_config(vmi, end, vma->vm_end); + vma_iter_config(vmg->vmi, vmg->end, vma->vm_end); - if (vma_iter_prealloc(vmi, NULL)) + if (vma_iter_prealloc(vmg->vmi, NULL)) return -ENOMEM; vma_start_write(vma); init_vma_prep(&vp, vma); vma_prepare(&vp); - vma_adjust_trans_huge(vma, start, end, 0); + vma_adjust_trans_huge(vma, vmg->start, vmg->end, 0); - vma_iter_clear(vmi); - vma_set_range(vma, start, end, pgoff); - vma_complete(&vp, vmi, vma->vm_mm); + vma_iter_clear(vmg->vmi); + vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff); + vma_complete(&vp, vmg->vmi, vma->vm_mm); return 0; } diff --git a/mm/vma.h b/mm/vma.h index c31684cc1da6..c464d25da120 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -66,12 +66,8 @@ void init_vma_prep(struct vma_prepare *vp, void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, struct mm_struct *mm); -int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff, - struct vm_area_struct *next); - -int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff); +int vma_expand(struct vma_merge_struct *vmg); +int vma_shrink(struct vma_merge_struct *vmg); int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 48e033c60d87..d216e51206c1 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -142,10 +142,17 @@ static bool test_simple_expand(void) struct mm_struct mm = {}; struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, flags); VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .vmi = &vmi, + .vma = vma, + .start = 0, + .end = 0x3000, + .pgoff = 0, + }; ASSERT_FALSE(vma_link(&mm, vma)); - ASSERT_FALSE(vma_expand(&vmi, vma, 0, 0x3000, 0, NULL)); + ASSERT_FALSE(vma_expand(&vmg)); ASSERT_EQ(vma->vm_start, 0); ASSERT_EQ(vma->vm_end, 0x3000); @@ -163,10 +170,17 @@ static bool test_simple_shrink(void) struct mm_struct mm = {}; struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .vmi = &vmi, + .vma = vma, + .start = 0, + .end = 0x1000, + .pgoff = 0, + }; ASSERT_FALSE(vma_link(&mm, vma)); - ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); + ASSERT_FALSE(vma_shrink(&vmg)); ASSERT_EQ(vma->vm_start, 0); ASSERT_EQ(vma->vm_end, 0x1000);
Equally use struct vma_merge_struct to abstract parameters for VMA expansion and shrinking. This leads the way to further refactoring and de-duplication by standardising the interface. Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> --- mm/mmap.c | 30 +++++++++++-------- mm/vma.c | 66 ++++++++++++++++++----------------------- mm/vma.h | 8 ++--- tools/testing/vma/vma.c | 18 +++++++++-- 4 files changed, 65 insertions(+), 57 deletions(-)