@@ -2607,8 +2607,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
-extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
- struct list_head *uf, bool downgrade);
+extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
@@ -2351,44 +2351,6 @@ static void unmap_region(struct mm_struct *mm,
tlb_finish_mmu(&tlb);
}
-/*
- * Create a list of vma's touched by the unmap, removing them from the mm's
- * vma list as we go..
- */
-static bool
-detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
- struct vm_area_struct *prev, unsigned long end)
-{
- struct vm_area_struct **insertion_point;
- struct vm_area_struct *tail_vma = NULL;
-
- insertion_point = (prev ? &prev->vm_next : &mm->mmap);
- vma->vm_prev = NULL;
- vma_mt_szero(mm, vma->vm_start, end);
- do {
- mm->map_count--;
- tail_vma = vma;
- vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
- *insertion_point = vma;
- if (vma)
- vma->vm_prev = prev;
- else
- mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
- tail_vma->vm_next = NULL;
-
- /*
- * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
- * VM_GROWSUP VMA. Such VMAs can change their size under
- * down_read(mmap_lock) and collide with the VMA we are about to unmap.
- */
- if (vma && (vma->vm_flags & VM_GROWSDOWN))
- return false;
- if (prev && (prev->vm_flags & VM_GROWSUP))
- return false;
- return true;
-}
-
/*
* __split_vma() bypasses sysctl_max_map_count checking. We use this where it
* has already been checked or doesn't make sense to fail.
@@ -2468,49 +2430,46 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return __split_vma(mm, vma, addr, new_below);
}
-static inline void
-unlock_range(struct vm_area_struct *start, unsigned long limit)
+static inline int
+unlock_range(struct vm_area_struct *start, struct vm_area_struct **tail,
+ unsigned long limit)
{
struct mm_struct *mm = start->vm_mm;
struct vm_area_struct *tmp = start;
+ int count = 0;
while (tmp && tmp->vm_start < limit) {
+ *tail = tmp;
+ count++;
if (tmp->vm_flags & VM_LOCKED) {
mm->locked_vm -= vma_pages(tmp);
munlock_vma_pages_all(tmp);
}
tmp = tmp->vm_next;
}
+
+ return count;
}
-/* Munmap is split into 2 main parts -- this part which finds
- * what needs doing, and the areas themselves, which do the
- * work. This now handles partial unmappings.
- * Jeremy Fitzhardinge <jeremy@goop.org>
+/*
+ * do_mas_align_munmap() - munmap the aligned region from @start to @end.
+ * @mas: The maple_state, ideally set up to alter the correct tree location.
+ * @vma: The starting vm_area_struct
+ * @mm: The mm_struct
+ * @start: The aligned start address to munmap.
+ * @end: The aligned end address to munmap.
+ * @uf: The userfaultfd list_head
+ * @downgrade: Set to true to attempt a downwrite of the mmap_sem
+ *
+ * @mas must be locked before calling this function. If @downgrade is true,
+ * check return code for potential release of the lock.
*/
-int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
- struct list_head *uf, bool downgrade)
+static int
+do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long end, struct list_head *uf, bool downgrade)
{
- unsigned long end;
- struct vm_area_struct *vma, *prev, *last;
-
- if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
- return -EINVAL;
-
- len = PAGE_ALIGN(len);
- end = start + len;
- if (len == 0)
- return -EINVAL;
-
- /* arch_unmap() might do unmaps itself. */
- arch_unmap(mm, start, end);
-
- /* Find the first overlapping VMA where start < vma->vm_end */
- vma = find_vma_intersection(mm, start, end);
- if (!vma)
- return 0;
-
- prev = vma->vm_prev;
+ struct vm_area_struct *prev, *last;
/* we have start < vma->vm_end */
/*
@@ -2534,16 +2493,27 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
if (error)
return error;
prev = vma;
+ vma = vma_next(mm, prev);
+ mas->index = start;
+ mas_reset(mas);
+ } else {
+ prev = vma->vm_prev;
}
+ if (vma->vm_end >= end)
+ last = vma;
+ else
+ last = find_vma_intersection(mm, end - 1, end);
+
/* Does it split the last one? */
- last = find_vma(mm, end);
- if (last && end > last->vm_start) {
+ if (last && end < last->vm_end) {
int error = __split_vma(mm, last, end, 1);
if (error)
return error;
+ vma = vma_next(mm, prev);
+ mas_reset(mas);
}
- vma = vma_next(mm, prev);
+
if (unlikely(uf)) {
/*
@@ -2556,22 +2526,48 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
* failure that it's not worth optimizing it for.
*/
int error = userfaultfd_unmap_prep(vma, start, end, uf);
+
if (error)
return error;
}
/*
- * unlock any mlock()ed ranges before detaching vmas
+ * unlock any mlock()ed ranges before detaching vmas, count the number
+ * of VMAs to be dropped, and return the tail entry of the affected
+ * area.
*/
- if (mm->locked_vm)
- unlock_range(vma, end);
+ mm->map_count -= unlock_range(vma, &last, end);
+ /* Drop removed area from the tree */
+ mas_store_gfp(mas, NULL, GFP_KERNEL);
- /* Detach vmas from the MM linked list and remove from the mm tree*/
- if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
- downgrade = false;
+ /* Detach vmas from the MM linked list */
+ vma->vm_prev = NULL;
+ if (prev)
+ prev->vm_next = last->vm_next;
+ else
+ mm->mmap = last->vm_next;
- if (downgrade)
- mmap_write_downgrade(mm);
+ if (last->vm_next) {
+ last->vm_next->vm_prev = prev;
+ last->vm_next = NULL;
+ } else
+ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+
+ /*
+ * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+ * VM_GROWSUP VMA. Such VMAs can change their size under
+ * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+ */
+ if (downgrade) {
+ if (last && (last->vm_flags & VM_GROWSDOWN))
+ downgrade = false;
+ else if (prev && (prev->vm_flags & VM_GROWSUP))
+ downgrade = false;
+ else {
+ mas_unlock(mas);
+ mmap_write_downgrade(mm);
+ }
+ }
unmap_region(mm, vma, prev, start, end);
@@ -2581,10 +2577,65 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
return downgrade ? 1 : 0;
}
+/*
+ * do_mas_munmap() - munmap a given range.
+ * @mas: The maple state
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length of the range to munmap
+ * @uf: The userfaultfd list_head
+ * @downgrade: set to true if the user wants to attempt to write_downgrade the
+ * mmap_sem
+ *
+ * This function takes a @mas that is in the correct state to remove the
+ * mapping(s). The @len will be aligned and any arch_unmap work will be
+ * preformed.
+ * @mas must be locked. @mas may be unlocked if @degraded is true.
+ *
+ * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
+ */
+int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade)
+{
+ unsigned long end;
+ struct vm_area_struct *vma;
+
+ if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
+ return -EINVAL;
+
+ end = start + PAGE_ALIGN(len);
+ if (end == start)
+ return -EINVAL;
+
+ /* arch_unmap() might do unmaps itself. */
+ arch_unmap(mm, start, end);
+
+ /* Find the first overlapping VMA */
+ vma = mas_find(mas, end - 1);
+ if (!vma)
+ return 0;
+
+ mas->last = end - 1;
+ return do_mas_align_munmap(mas, vma, mm, start, end, uf, downgrade);
+}
+
+/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
+ * @mm: The mm_struct
+ * @start: The start address to munmap
+ * @len: The length to be munmapped.
+ * @uf: The userfaultfd list_head
+ */
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
struct list_head *uf)
{
- return __do_munmap(mm, start, len, uf, false);
+ int ret;
+ MA_STATE(mas, &mm->mm_mt, start, start);
+
+ mas_lock(&mas);
+ ret = do_mas_munmap(&mas, mm, start, len, uf, false);
+ mas_unlock(&mas);
+ return ret;
}
unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2618,8 +2669,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Unmap any existing mapping in the area */
- if (do_munmap(mm, addr, len, uf))
+ mas_lock(&mas);
+ if (do_mas_munmap(&mas, mm, addr, len, uf, false)) {
+ mas_unlock(&mas);
return -ENOMEM;
+ }
+ mas_unlock(&mas);
/*
* Private writable mapping: check memory availability
@@ -2817,11 +2872,13 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
int ret;
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
+ MA_STATE(mas, &mm->mm_mt, start, start);
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = __do_munmap(mm, start, len, &uf, downgrade);
+ mas_lock(&mas);
+ ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade);
/*
* Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
@@ -2830,8 +2887,10 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
if (ret == 1) {
mmap_read_unlock(mm);
ret = 0;
- } else
+ } else {
+ mas_unlock(&mas);
mmap_write_unlock(mm);
+ }
userfaultfd_unmap_complete(mm, &uf);
return ret;
@@ -2955,10 +3014,10 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
arch_unmap(mm, newbrk, oldbrk);
if (likely(vma->vm_start >= newbrk)) { // remove entire mapping(s)
- mas_set(mas, newbrk);
- if (vma->vm_start != newbrk)
- mas_reset(mas); // cause a re-walk for the first overlap.
- ret = __do_munmap(mm, newbrk, oldbrk - newbrk, uf, true);
+ mas_lock(mas);
+ ret = do_mas_munmap(mas, mm, newbrk, oldbrk-newbrk, uf, true);
+ if (ret != 1)
+ mas_unlock(mas);
goto munmap_full_vma;
}
@@ -3213,7 +3272,7 @@ void exit_mmap(struct mm_struct *mm)
}
if (mm->locked_vm)
- unlock_range(mm->mmap, ULONG_MAX);
+ unlock_range(mm->mmap, &vma, ULONG_MAX);
arch_exit_mmap(mm);
@@ -960,20 +960,27 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
/*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
- * __do_munmap does all the needed commit accounting, and
+ * do_mas_munmap does all the needed commit accounting, and
* downgrades mmap_lock to read if so directed.
*/
if (old_len >= new_len) {
int retval;
+ MA_STATE(mas, &mm->mm_mt, addr + new_len, addr + new_len);
- retval = __do_munmap(mm, addr+new_len, old_len - new_len,
- &uf_unmap, true);
- if (retval < 0 && old_len != new_len) {
- ret = retval;
- goto out;
+ mas_lock(&mas);
+ retval = do_mas_munmap(&mas, mm, addr + new_len,
+ old_len - new_len, &uf_unmap, true);
/* Returning 1 indicates mmap_lock is downgraded to read. */
- } else if (retval == 1)
+ if (retval == 1) {
downgraded = true;
+ } else {
+ mas_unlock(&mas);
+ if (retval < 0 && old_len != new_len) {
+ ret = retval;
+ goto out;
+ }
+ }
+
ret = addr;
goto out;
}