Message ID | 20211201142918.921493-56-Liam.Howlett@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Introducing the Maple Tree | expand |
On 12/1/21 15:30, Liam Howlett wrote: > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> > --- > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ > 1 file changed, 28 insertions(+), 25 deletions(-) > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 10e9c87260ed..0e2d918f4f30 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) > { > struct vm_area_struct *vma; > + MA_STATE(mas, &mm->mm_mt, 0, 0); VMA_ITERATOR? > > mmap_write_lock(mm); > - for (vma = mm->mmap; vma; vma = vma->vm_next) > + mas_for_each(&mas, vma, ULONG_MAX) > mpol_rebind_policy(vma->vm_policy, new); > mmap_write_unlock(mm); > } > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, > static int queue_pages_test_walk(unsigned long start, unsigned long end, > struct mm_walk *walk) > { > - struct vm_area_struct *vma = walk->vma; > + struct vm_area_struct *next, *vma = walk->vma; > struct queue_pages *qp = walk->private; > unsigned long endvma = vma->vm_end; > unsigned long flags = qp->flags; > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > /* hole at head side of range */ > return -EFAULT; > } > + next = find_vma(vma->vm_mm, vma->vm_end); > if (!(flags & MPOL_MF_DISCONTIG_OK) && > ((vma->vm_end < qp->end) && > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) > + (!next || vma->vm_end < next->vm_start))) > /* hole at middle or tail of range */ > return -EFAULT; > > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, > static int mbind_range(struct mm_struct *mm, unsigned long start, > unsigned long end, struct mempolicy *new_pol) > { > - struct vm_area_struct *next; > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); > struct vm_area_struct *prev; > struct vm_area_struct *vma; > int err = 0; > pgoff_t pgoff; > - unsigned long vmstart; > - unsigned long vmend; > - > - vma = find_vma(mm, start); > - VM_BUG_ON(!vma); > > - prev = vma->vm_prev; > - if (start > vma->vm_start) > - prev = vma; > + prev = mas_find_rev(&mas, 0); > + if (prev && (start < prev->vm_end)) > + vma = prev; > + else > + vma = mas_next(&mas, end - 1); > > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { > - next = vma->vm_next; > - vmstart = max(start, vma->vm_start); > - vmend = min(end, vma->vm_end); > + do { > + unsigned long vmstart = max(start, vma->vm_start); > + unsigned long vmend = min(end, vma->vm_end); What if vma is null? Shouldn't this rather be a "for (; vma; vma = mas_next(...)" > > if (mpol_equal(vma_policy(vma), new_pol)) > - continue; > + goto next; > > pgoff = vma->vm_pgoff + > ((vmstart - vma->vm_start) >> PAGE_SHIFT); > @@ -812,8 +810,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, > vma->anon_vma, vma->vm_file, pgoff, > new_pol, vma->vm_userfaultfd_ctx); > if (prev) { > + mas_pause(&mas); > vma = prev; > - next = vma->vm_next; > if (mpol_equal(vma_policy(vma), new_pol)) > continue; > /* vma_merge() joined vma && vma->next, case 8 */ > @@ -823,19 +821,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, > err = split_vma(vma->vm_mm, vma, vmstart, 1); > if (err) > goto out; > + mas_pause(&mas); > } > if (vma->vm_end != vmend) { > err = split_vma(vma->vm_mm, vma, vmend, 0); > if (err) > goto out; > + /* mas_pause() unnecessary as the loop is ending */ > } > - replace: > +replace: > err = vma_replace_policy(vma, new_pol); > if (err) > goto out; > - } > +next: > + prev = vma; > + } while ((vma = mas_next(&mas, end - 1)) != NULL); > > - out: > +out: > return err; > } > > @@ -1053,6 +1055,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, > int flags) > { > nodemask_t nmask; > + struct vm_area_struct *vma; > LIST_HEAD(pagelist); > int err = 0; > struct migration_target_control mtc = { > @@ -1068,8 +1071,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, > * need migration. Between passing in the full user address > * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. > */ > + vma = find_vma(mm, 0); > VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); > - queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, > + queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, > flags | MPOL_MF_DISCONTIG_OK, &pagelist); > > if (!list_empty(&pagelist)) { > @@ -1198,13 +1202,12 @@ static struct page *new_page(struct page *page, unsigned long start) > { > struct vm_area_struct *vma; > unsigned long address; > + MA_STATE(mas, ¤t->mm->mm_mt, start, start); > > - vma = find_vma(current->mm, start); > - while (vma) { > + mas_for_each(&mas, vma, ULONG_MAX) { > address = page_address_in_vma(page, vma); > if (address != -EFAULT) > break; > - vma = vma->vm_next; > } > > if (PageHuge(page)) {
* Vlastimil Babka <vbabka@suse.cz> [220120 06:58]: > On 12/1/21 15:30, Liam Howlett wrote: > > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> > > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> > > --- > > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ > > 1 file changed, 28 insertions(+), 25 deletions(-) > > > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > > index 10e9c87260ed..0e2d918f4f30 100644 > > --- a/mm/mempolicy.c > > +++ b/mm/mempolicy.c > > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) > > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) > > { > > struct vm_area_struct *vma; > > + MA_STATE(mas, &mm->mm_mt, 0, 0); > > VMA_ITERATOR? Yes, I will make this change. Thanks. > > > > > mmap_write_lock(mm); > > - for (vma = mm->mmap; vma; vma = vma->vm_next) > > + mas_for_each(&mas, vma, ULONG_MAX) > > mpol_rebind_policy(vma->vm_policy, new); > > mmap_write_unlock(mm); > > } > > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, > > static int queue_pages_test_walk(unsigned long start, unsigned long end, > > struct mm_walk *walk) > > { > > - struct vm_area_struct *vma = walk->vma; > > + struct vm_area_struct *next, *vma = walk->vma; > > struct queue_pages *qp = walk->private; > > unsigned long endvma = vma->vm_end; > > unsigned long flags = qp->flags; > > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > > /* hole at head side of range */ > > return -EFAULT; > > } > > + next = find_vma(vma->vm_mm, vma->vm_end); > > if (!(flags & MPOL_MF_DISCONTIG_OK) && > > ((vma->vm_end < qp->end) && > > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) > > + (!next || vma->vm_end < next->vm_start))) > > /* hole at middle or tail of range */ > > return -EFAULT; > > > > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, > > static int mbind_range(struct mm_struct *mm, unsigned long start, > > unsigned long end, struct mempolicy *new_pol) > > { > > - struct vm_area_struct *next; > > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); > > struct vm_area_struct *prev; > > struct vm_area_struct *vma; > > int err = 0; > > pgoff_t pgoff; > > - unsigned long vmstart; > > - unsigned long vmend; > > - > > - vma = find_vma(mm, start); > > - VM_BUG_ON(!vma); > > > > - prev = vma->vm_prev; > > - if (start > vma->vm_start) > > - prev = vma; > > + prev = mas_find_rev(&mas, 0); > > + if (prev && (start < prev->vm_end)) > > + vma = prev; > > + else > > + vma = mas_next(&mas, end - 1); > > > > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { > > - next = vma->vm_next; > > - vmstart = max(start, vma->vm_start); > > - vmend = min(end, vma->vm_end); > > + do { > > + unsigned long vmstart = max(start, vma->vm_start); > > + unsigned long vmend = min(end, vma->vm_end); > > What if vma is null? Shouldn't this rather be a "for (; vma; vma = > mas_next(...)" We have already found the vma above. I am re-using the maple state to save walking the tree. I will re-add the VM_BUG_ON(!vma), but I think it's more efficient the way it is. > > > > > if (mpol_equal(vma_policy(vma), new_pol)) > > - continue; > > + goto next; > > > > pgoff = vma->vm_pgoff + > > ((vmstart - vma->vm_start) >> PAGE_SHIFT); > > @@ -812,8 +810,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, > > vma->anon_vma, vma->vm_file, pgoff, > > new_pol, vma->vm_userfaultfd_ctx); > > if (prev) { > > + mas_pause(&mas); > > vma = prev; > > - next = vma->vm_next; > > if (mpol_equal(vma_policy(vma), new_pol)) > > continue; > > /* vma_merge() joined vma && vma->next, case 8 */ > > @@ -823,19 +821,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, > > err = split_vma(vma->vm_mm, vma, vmstart, 1); > > if (err) > > goto out; > > + mas_pause(&mas); > > } > > if (vma->vm_end != vmend) { > > err = split_vma(vma->vm_mm, vma, vmend, 0); > > if (err) > > goto out; > > + /* mas_pause() unnecessary as the loop is ending */ > > } > > - replace: > > +replace: > > err = vma_replace_policy(vma, new_pol); > > if (err) > > goto out; > > - } > > +next: > > + prev = vma; > > + } while ((vma = mas_next(&mas, end - 1)) != NULL); > > > > - out: > > +out: > > return err; > > } > > > > @@ -1053,6 +1055,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, > > int flags) > > { > > nodemask_t nmask; > > + struct vm_area_struct *vma; > > LIST_HEAD(pagelist); > > int err = 0; > > struct migration_target_control mtc = { > > @@ -1068,8 +1071,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, > > * need migration. Between passing in the full user address > > * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. > > */ > > + vma = find_vma(mm, 0); > > VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); > > - queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, > > + queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, > > flags | MPOL_MF_DISCONTIG_OK, &pagelist); > > > > if (!list_empty(&pagelist)) { > > @@ -1198,13 +1202,12 @@ static struct page *new_page(struct page *page, unsigned long start) > > { > > struct vm_area_struct *vma; > > unsigned long address; > > + MA_STATE(mas, ¤t->mm->mm_mt, start, start); > > > > - vma = find_vma(current->mm, start); > > - while (vma) { > > + mas_for_each(&mas, vma, ULONG_MAX) { > > address = page_address_in_vma(page, vma); > > if (address != -EFAULT) > > break; > > - vma = vma->vm_next; > > } > > > > if (PageHuge(page)) { >
On 1/26/22 03:48, Liam Howlett wrote: > * Vlastimil Babka <vbabka@suse.cz> [220120 06:58]: >> On 12/1/21 15:30, Liam Howlett wrote: >> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> >> > >> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> >> > --- >> > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ >> > 1 file changed, 28 insertions(+), 25 deletions(-) >> > >> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c >> > index 10e9c87260ed..0e2d918f4f30 100644 >> > --- a/mm/mempolicy.c >> > +++ b/mm/mempolicy.c >> > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) >> > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) >> > { >> > struct vm_area_struct *vma; >> > + MA_STATE(mas, &mm->mm_mt, 0, 0); >> >> VMA_ITERATOR? > > Yes, I will make this change. Thanks. > >> >> > >> > mmap_write_lock(mm); >> > - for (vma = mm->mmap; vma; vma = vma->vm_next) >> > + mas_for_each(&mas, vma, ULONG_MAX) >> > mpol_rebind_policy(vma->vm_policy, new); >> > mmap_write_unlock(mm); >> > } >> > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, >> > static int queue_pages_test_walk(unsigned long start, unsigned long end, >> > struct mm_walk *walk) >> > { >> > - struct vm_area_struct *vma = walk->vma; >> > + struct vm_area_struct *next, *vma = walk->vma; >> > struct queue_pages *qp = walk->private; >> > unsigned long endvma = vma->vm_end; >> > unsigned long flags = qp->flags; >> > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, >> > /* hole at head side of range */ >> > return -EFAULT; >> > } >> > + next = find_vma(vma->vm_mm, vma->vm_end); >> > if (!(flags & MPOL_MF_DISCONTIG_OK) && >> > ((vma->vm_end < qp->end) && >> > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) >> > + (!next || vma->vm_end < next->vm_start))) >> > /* hole at middle or tail of range */ >> > return -EFAULT; >> > >> > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, >> > static int mbind_range(struct mm_struct *mm, unsigned long start, >> > unsigned long end, struct mempolicy *new_pol) >> > { >> > - struct vm_area_struct *next; >> > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); >> > struct vm_area_struct *prev; >> > struct vm_area_struct *vma; >> > int err = 0; >> > pgoff_t pgoff; >> > - unsigned long vmstart; >> > - unsigned long vmend; >> > - >> > - vma = find_vma(mm, start); >> > - VM_BUG_ON(!vma); >> > >> > - prev = vma->vm_prev; >> > - if (start > vma->vm_start) >> > - prev = vma; >> > + prev = mas_find_rev(&mas, 0); >> > + if (prev && (start < prev->vm_end)) >> > + vma = prev; >> > + else >> > + vma = mas_next(&mas, end - 1); >> > >> > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { >> > - next = vma->vm_next; >> > - vmstart = max(start, vma->vm_start); >> > - vmend = min(end, vma->vm_end); >> > + do { >> > + unsigned long vmstart = max(start, vma->vm_start); >> > + unsigned long vmend = min(end, vma->vm_end); >> >> What if vma is null? Shouldn't this rather be a "for (; vma; vma = >> mas_next(...)" > > We have already found the vma above. AFAICS if the range intersects no vmas, we might have found a 'prev', but 'vma' might be NULL after the "vma = mas_next(&mas, end - 1);"?
* Vlastimil Babka <vbabka@suse.cz> [220126 04:23]: > On 1/26/22 03:48, Liam Howlett wrote: > > * Vlastimil Babka <vbabka@suse.cz> [220120 06:58]: > >> On 12/1/21 15:30, Liam Howlett wrote: > >> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> > >> > > >> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > >> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> > >> > --- > >> > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ > >> > 1 file changed, 28 insertions(+), 25 deletions(-) > >> > > >> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > >> > index 10e9c87260ed..0e2d918f4f30 100644 > >> > --- a/mm/mempolicy.c > >> > +++ b/mm/mempolicy.c > >> > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) > >> > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) > >> > { > >> > struct vm_area_struct *vma; > >> > + MA_STATE(mas, &mm->mm_mt, 0, 0); > >> > >> VMA_ITERATOR? > > > > Yes, I will make this change. Thanks. > > > >> > >> > > >> > mmap_write_lock(mm); > >> > - for (vma = mm->mmap; vma; vma = vma->vm_next) > >> > + mas_for_each(&mas, vma, ULONG_MAX) > >> > mpol_rebind_policy(vma->vm_policy, new); > >> > mmap_write_unlock(mm); > >> > } > >> > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, > >> > static int queue_pages_test_walk(unsigned long start, unsigned long end, > >> > struct mm_walk *walk) > >> > { > >> > - struct vm_area_struct *vma = walk->vma; > >> > + struct vm_area_struct *next, *vma = walk->vma; > >> > struct queue_pages *qp = walk->private; > >> > unsigned long endvma = vma->vm_end; > >> > unsigned long flags = qp->flags; > >> > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > >> > /* hole at head side of range */ > >> > return -EFAULT; > >> > } > >> > + next = find_vma(vma->vm_mm, vma->vm_end); > >> > if (!(flags & MPOL_MF_DISCONTIG_OK) && > >> > ((vma->vm_end < qp->end) && > >> > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) > >> > + (!next || vma->vm_end < next->vm_start))) > >> > /* hole at middle or tail of range */ > >> > return -EFAULT; > >> > > >> > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, > >> > static int mbind_range(struct mm_struct *mm, unsigned long start, > >> > unsigned long end, struct mempolicy *new_pol) > >> > { > >> > - struct vm_area_struct *next; > >> > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); > >> > struct vm_area_struct *prev; > >> > struct vm_area_struct *vma; > >> > int err = 0; > >> > pgoff_t pgoff; > >> > - unsigned long vmstart; > >> > - unsigned long vmend; > >> > - > >> > - vma = find_vma(mm, start); > >> > - VM_BUG_ON(!vma); > >> > > >> > - prev = vma->vm_prev; > >> > - if (start > vma->vm_start) > >> > - prev = vma; > >> > + prev = mas_find_rev(&mas, 0); > >> > + if (prev && (start < prev->vm_end)) > >> > + vma = prev; > >> > + else > >> > + vma = mas_next(&mas, end - 1); > >> > > >> > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { > >> > - next = vma->vm_next; > >> > - vmstart = max(start, vma->vm_start); > >> > - vmend = min(end, vma->vm_end); > >> > + do { > >> > + unsigned long vmstart = max(start, vma->vm_start); > >> > + unsigned long vmend = min(end, vma->vm_end); > >> > >> What if vma is null? Shouldn't this rather be a "for (; vma; vma = > >> mas_next(...)" > > > > We have already found the vma above. > > AFAICS if the range intersects no vmas, we might have found a 'prev', but > 'vma' might be NULL after the "vma = mas_next(&mas, end - 1);"? Yes, I agree. I was going to restore VM_BUG_ON(!vma) after mas_next(), but that's not the same as the existing code. The VM_BUG_ON(!vma) only catches if we search for 'start' above any VMA. It seems mbind_range() would have returned without error if there were no VMAs within the range but would error if the 'start' was sufficiently large. I think it is better to write it as you suggested to restore the functionality of not failing on an empty list. I don't see a decent way of checking if we searched for an address above any VMA to restore the VM_BUG_ON() which existed - although I see little value in it to begin with. I will mention this in the commit message.
On 1/27/22 18:25, Liam Howlett wrote: > * Vlastimil Babka <vbabka@suse.cz> [220126 04:23]: >> On 1/26/22 03:48, Liam Howlett wrote: >> > * Vlastimil Babka <vbabka@suse.cz> [220120 06:58]: >> >> On 12/1/21 15:30, Liam Howlett wrote: >> >> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> >> >> > >> >> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> >> >> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> >> >> > --- >> >> > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ >> >> > 1 file changed, 28 insertions(+), 25 deletions(-) >> >> > >> >> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c >> >> > index 10e9c87260ed..0e2d918f4f30 100644 >> >> > --- a/mm/mempolicy.c >> >> > +++ b/mm/mempolicy.c >> >> > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) >> >> > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) >> >> > { >> >> > struct vm_area_struct *vma; >> >> > + MA_STATE(mas, &mm->mm_mt, 0, 0); >> >> >> >> VMA_ITERATOR? >> > >> > Yes, I will make this change. Thanks. >> > >> >> >> >> > >> >> > mmap_write_lock(mm); >> >> > - for (vma = mm->mmap; vma; vma = vma->vm_next) >> >> > + mas_for_each(&mas, vma, ULONG_MAX) >> >> > mpol_rebind_policy(vma->vm_policy, new); >> >> > mmap_write_unlock(mm); >> >> > } >> >> > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, >> >> > static int queue_pages_test_walk(unsigned long start, unsigned long end, >> >> > struct mm_walk *walk) >> >> > { >> >> > - struct vm_area_struct *vma = walk->vma; >> >> > + struct vm_area_struct *next, *vma = walk->vma; >> >> > struct queue_pages *qp = walk->private; >> >> > unsigned long endvma = vma->vm_end; >> >> > unsigned long flags = qp->flags; >> >> > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, >> >> > /* hole at head side of range */ >> >> > return -EFAULT; >> >> > } >> >> > + next = find_vma(vma->vm_mm, vma->vm_end); >> >> > if (!(flags & MPOL_MF_DISCONTIG_OK) && >> >> > ((vma->vm_end < qp->end) && >> >> > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) >> >> > + (!next || vma->vm_end < next->vm_start))) >> >> > /* hole at middle or tail of range */ >> >> > return -EFAULT; >> >> > >> >> > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, >> >> > static int mbind_range(struct mm_struct *mm, unsigned long start, >> >> > unsigned long end, struct mempolicy *new_pol) >> >> > { >> >> > - struct vm_area_struct *next; >> >> > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); >> >> > struct vm_area_struct *prev; >> >> > struct vm_area_struct *vma; >> >> > int err = 0; >> >> > pgoff_t pgoff; >> >> > - unsigned long vmstart; >> >> > - unsigned long vmend; >> >> > - >> >> > - vma = find_vma(mm, start); >> >> > - VM_BUG_ON(!vma); >> >> > >> >> > - prev = vma->vm_prev; >> >> > - if (start > vma->vm_start) >> >> > - prev = vma; >> >> > + prev = mas_find_rev(&mas, 0); >> >> > + if (prev && (start < prev->vm_end)) >> >> > + vma = prev; >> >> > + else >> >> > + vma = mas_next(&mas, end - 1); >> >> > >> >> > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { >> >> > - next = vma->vm_next; >> >> > - vmstart = max(start, vma->vm_start); >> >> > - vmend = min(end, vma->vm_end); >> >> > + do { >> >> > + unsigned long vmstart = max(start, vma->vm_start); >> >> > + unsigned long vmend = min(end, vma->vm_end); >> >> >> >> What if vma is null? Shouldn't this rather be a "for (; vma; vma = >> >> mas_next(...)" >> > >> > We have already found the vma above. >> >> AFAICS if the range intersects no vmas, we might have found a 'prev', but >> 'vma' might be NULL after the "vma = mas_next(&mas, end - 1);"? > > Yes, I agree. I was going to restore VM_BUG_ON(!vma) after mas_next(), > but that's not the same as the existing code. The VM_BUG_ON(!vma) > only catches if we search for 'start' above any VMA. It seems > mbind_range() would have returned without error if there were no VMAs > within the range but would error if the 'start' was sufficiently large. Ah I missed there was a VM_BUG_ON(!vma) previously and that the callers seem to only call mbind_range() if there's an actual vma in the range, so I guess my suggestion was misguided. > I think it is better to write it as you suggested to restore the > functionality of not failing on an empty list. I don't see a decent > way of checking if we searched for an address above any VMA to restore > the VM_BUG_ON() which existed - although I see little value in it to > begin with. I will mention this in the commit message.
* Vlastimil Babka <vbabka@suse.cz> [220127 12:33]: > On 1/27/22 18:25, Liam Howlett wrote: > > * Vlastimil Babka <vbabka@suse.cz> [220126 04:23]: > >> On 1/26/22 03:48, Liam Howlett wrote: > >> > * Vlastimil Babka <vbabka@suse.cz> [220120 06:58]: > >> >> On 12/1/21 15:30, Liam Howlett wrote: > >> >> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> > >> >> > > >> >> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > >> >> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> > >> >> > --- > >> >> > mm/mempolicy.c | 53 ++++++++++++++++++++++++++------------------------ > >> >> > 1 file changed, 28 insertions(+), 25 deletions(-) > >> >> > > >> >> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > >> >> > index 10e9c87260ed..0e2d918f4f30 100644 > >> >> > --- a/mm/mempolicy.c > >> >> > +++ b/mm/mempolicy.c > >> >> > @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) > >> >> > void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) > >> >> > { > >> >> > struct vm_area_struct *vma; > >> >> > + MA_STATE(mas, &mm->mm_mt, 0, 0); > >> >> > >> >> VMA_ITERATOR? > >> > > >> > Yes, I will make this change. Thanks. > >> > > >> >> > >> >> > > >> >> > mmap_write_lock(mm); > >> >> > - for (vma = mm->mmap; vma; vma = vma->vm_next) > >> >> > + mas_for_each(&mas, vma, ULONG_MAX) > >> >> > mpol_rebind_policy(vma->vm_policy, new); > >> >> > mmap_write_unlock(mm); > >> >> > } > >> >> > @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, > >> >> > static int queue_pages_test_walk(unsigned long start, unsigned long end, > >> >> > struct mm_walk *walk) > >> >> > { > >> >> > - struct vm_area_struct *vma = walk->vma; > >> >> > + struct vm_area_struct *next, *vma = walk->vma; > >> >> > struct queue_pages *qp = walk->private; > >> >> > unsigned long endvma = vma->vm_end; > >> >> > unsigned long flags = qp->flags; > >> >> > @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, > >> >> > /* hole at head side of range */ > >> >> > return -EFAULT; > >> >> > } > >> >> > + next = find_vma(vma->vm_mm, vma->vm_end); > >> >> > if (!(flags & MPOL_MF_DISCONTIG_OK) && > >> >> > ((vma->vm_end < qp->end) && > >> >> > - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) > >> >> > + (!next || vma->vm_end < next->vm_start))) > >> >> > /* hole at middle or tail of range */ > >> >> > return -EFAULT; > >> >> > > >> >> > @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, > >> >> > static int mbind_range(struct mm_struct *mm, unsigned long start, > >> >> > unsigned long end, struct mempolicy *new_pol) > >> >> > { > >> >> > - struct vm_area_struct *next; > >> >> > + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); > >> >> > struct vm_area_struct *prev; > >> >> > struct vm_area_struct *vma; > >> >> > int err = 0; > >> >> > pgoff_t pgoff; > >> >> > - unsigned long vmstart; > >> >> > - unsigned long vmend; > >> >> > - > >> >> > - vma = find_vma(mm, start); > >> >> > - VM_BUG_ON(!vma); > >> >> > > >> >> > - prev = vma->vm_prev; > >> >> > - if (start > vma->vm_start) > >> >> > - prev = vma; > >> >> > + prev = mas_find_rev(&mas, 0); > >> >> > + if (prev && (start < prev->vm_end)) > >> >> > + vma = prev; > >> >> > + else > >> >> > + vma = mas_next(&mas, end - 1); > >> >> > > >> >> > - for (; vma && vma->vm_start < end; prev = vma, vma = next) { > >> >> > - next = vma->vm_next; > >> >> > - vmstart = max(start, vma->vm_start); > >> >> > - vmend = min(end, vma->vm_end); > >> >> > + do { > >> >> > + unsigned long vmstart = max(start, vma->vm_start); > >> >> > + unsigned long vmend = min(end, vma->vm_end); > >> >> > >> >> What if vma is null? Shouldn't this rather be a "for (; vma; vma = > >> >> mas_next(...)" > >> > > >> > We have already found the vma above. > >> > >> AFAICS if the range intersects no vmas, we might have found a 'prev', but > >> 'vma' might be NULL after the "vma = mas_next(&mas, end - 1);"? > > > > Yes, I agree. I was going to restore VM_BUG_ON(!vma) after mas_next(), > > but that's not the same as the existing code. The VM_BUG_ON(!vma) > > only catches if we search for 'start' above any VMA. It seems > > mbind_range() would have returned without error if there were no VMAs > > within the range but would error if the 'start' was sufficiently large. > > Ah I missed there was a VM_BUG_ON(!vma) previously and that the callers seem > to only call mbind_range() if there's an actual vma in the range, so I guess > my suggestion was misguided. But that VM_BUG_ON(!vma) doesn't check that there's an actual vma in the range. Right now, if there's no vma in the range but there's one higher then it returns success. > > > I think it is better to write it as you suggested to restore the > > functionality of not failing on an empty list. I don't see a decent > > way of checking if we searched for an address above any VMA to restore > > the VM_BUG_ON() which existed - although I see little value in it to > > begin with. I will mention this in the commit message. >
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10e9c87260ed..0e2d918f4f30 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -377,9 +377,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { struct vm_area_struct *vma; + MA_STATE(mas, &mm->mm_mt, 0, 0); mmap_write_lock(mm); - for (vma = mm->mmap; vma; vma = vma->vm_next) + mas_for_each(&mas, vma, ULONG_MAX) mpol_rebind_policy(vma->vm_policy, new); mmap_write_unlock(mm); } @@ -652,7 +653,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, static int queue_pages_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->vma; + struct vm_area_struct *next, *vma = walk->vma; struct queue_pages *qp = walk->private; unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; @@ -667,9 +668,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, /* hole at head side of range */ return -EFAULT; } + next = find_vma(vma->vm_mm, vma->vm_end); if (!(flags & MPOL_MF_DISCONTIG_OK) && ((vma->vm_end < qp->end) && - (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) + (!next || vma->vm_end < next->vm_start))) /* hole at middle or tail of range */ return -EFAULT; @@ -783,28 +785,24 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { - struct vm_area_struct *next; + MA_STATE(mas, &mm->mm_mt, start - 1, start - 1); struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; pgoff_t pgoff; - unsigned long vmstart; - unsigned long vmend; - - vma = find_vma(mm, start); - VM_BUG_ON(!vma); - prev = vma->vm_prev; - if (start > vma->vm_start) - prev = vma; + prev = mas_find_rev(&mas, 0); + if (prev && (start < prev->vm_end)) + vma = prev; + else + vma = mas_next(&mas, end - 1); - for (; vma && vma->vm_start < end; prev = vma, vma = next) { - next = vma->vm_next; - vmstart = max(start, vma->vm_start); - vmend = min(end, vma->vm_end); + do { + unsigned long vmstart = max(start, vma->vm_start); + unsigned long vmend = min(end, vma->vm_end); if (mpol_equal(vma_policy(vma), new_pol)) - continue; + goto next; pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT); @@ -812,8 +810,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, vma->anon_vma, vma->vm_file, pgoff, new_pol, vma->vm_userfaultfd_ctx); if (prev) { + mas_pause(&mas); vma = prev; - next = vma->vm_next; if (mpol_equal(vma_policy(vma), new_pol)) continue; /* vma_merge() joined vma && vma->next, case 8 */ @@ -823,19 +821,23 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, err = split_vma(vma->vm_mm, vma, vmstart, 1); if (err) goto out; + mas_pause(&mas); } if (vma->vm_end != vmend) { err = split_vma(vma->vm_mm, vma, vmend, 0); if (err) goto out; + /* mas_pause() unnecessary as the loop is ending */ } - replace: +replace: err = vma_replace_policy(vma, new_pol); if (err) goto out; - } +next: + prev = vma; + } while ((vma = mas_next(&mas, end - 1)) != NULL); - out: +out: return err; } @@ -1053,6 +1055,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags) { nodemask_t nmask; + struct vm_area_struct *vma; LIST_HEAD(pagelist); int err = 0; struct migration_target_control mtc = { @@ -1068,8 +1071,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, * need migration. Between passing in the full user address * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. */ + vma = find_vma(mm, 0); VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); - queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, + queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); if (!list_empty(&pagelist)) { @@ -1198,13 +1202,12 @@ static struct page *new_page(struct page *page, unsigned long start) { struct vm_area_struct *vma; unsigned long address; + MA_STATE(mas, ¤t->mm->mm_mt, start, start); - vma = find_vma(current->mm, start); - while (vma) { + mas_for_each(&mas, vma, ULONG_MAX) { address = page_address_in_vma(page, vma); if (address != -EFAULT) break; - vma = vma->vm_next; } if (PageHuge(page)) {