diff mbox series

[RFC,1/3] mm, oom: rework mmap_exit vs. oom_reaper synchronization

Message ID 20180910125513.311-2-mhocko@kernel.org (mailing list archive)
State New, archived
Headers show
Series rework mmap-exit vs. oom_reaper handover | expand

Commit Message

Michal Hocko Sept. 10, 2018, 12:55 p.m. UTC
From: Michal Hocko <mhocko@suse.com>

The oom_reaper cannot handle mlocked vmas right now and therefore we
have exit_mmap to reap the memory before it clears the mlock flags on
mappings. This is all good but we would like to have a better hand over
protocol between the oom_reaper and exit_mmap paths.

Therefore use exclusive mmap_sem in exit_mmap whenever exit_mmap has to
synchronize with the oom_reaper. There are two notable places. Mlocked
vmas (munlock_vma_pages_all) and page tables tear down path. All others
should be fine to race with oom_reap_task_mm.

This is mostly a preparatory patch which shouldn't introduce functional
changes.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 mm/mmap.c | 48 +++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 5f2b2b184c60..3481424717ac 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3042,39 +3042,29 @@  void exit_mmap(struct mm_struct *mm)
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
 	unsigned long nr_accounted = 0;
+	bool oom = mm_is_oom_victim(mm);
 
 	/* mm's last user has gone, and its about to be pulled down */
 	mmu_notifier_release(mm);
 
-	if (unlikely(mm_is_oom_victim(mm))) {
-		/*
-		 * Manually reap the mm to free as much memory as possible.
-		 * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
-		 * this mm from further consideration.  Taking mm->mmap_sem for
-		 * write after setting MMF_OOM_SKIP will guarantee that the oom
-		 * reaper will not run on this mm again after mmap_sem is
-		 * dropped.
-		 *
-		 * Nothing can be holding mm->mmap_sem here and the above call
-		 * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
-		 * __oom_reap_task_mm() will not block.
-		 *
-		 * This needs to be done before calling munlock_vma_pages_all(),
-		 * which clears VM_LOCKED, otherwise the oom reaper cannot
-		 * reliably test it.
-		 */
-		(void)__oom_reap_task_mm(mm);
-
-		set_bit(MMF_OOM_SKIP, &mm->flags);
-		down_write(&mm->mmap_sem);
-		up_write(&mm->mmap_sem);
-	}
-
 	if (mm->locked_vm) {
 		vma = mm->mmap;
 		while (vma) {
-			if (vma->vm_flags & VM_LOCKED)
+			if (vma->vm_flags & VM_LOCKED) {
+				/*
+				 * oom_reaper cannot handle mlocked vmas but we
+				 * need to serialize it with munlock_vma_pages_all
+				 * which clears VM_LOCKED, otherwise the oom reaper
+				 * cannot reliably test it.
+				 */
+				if (oom)
+					down_write(&mm->mmap_sem);
+
 				munlock_vma_pages_all(vma);
+
+				if (oom)
+					up_write(&mm->mmap_sem);
+			}
 			vma = vma->vm_next;
 		}
 	}
@@ -3091,6 +3081,11 @@  void exit_mmap(struct mm_struct *mm)
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
+
+	/* oom_reaper cannot race with the page tables teardown */
+	if (oom)
+		down_write(&mm->mmap_sem);
+
 	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
 	tlb_finish_mmu(&tlb, 0, -1);
 
@@ -3104,6 +3099,9 @@  void exit_mmap(struct mm_struct *mm)
 		vma = remove_vma(vma);
 	}
 	vm_unacct_memory(nr_accounted);
+
+	if (oom)
+		up_write(&mm->mmap_sem);
 }
 
 /* Insert vm structure into process list sorted by address