@@ -1169,15 +1169,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
goto put_task;
}
- if (mmget_not_zero(p->mm)) {
- mm = p->mm;
- if (task_will_free_mem(p))
- reap = true;
- else {
- /* Error only if the work has not been done already */
- if (!test_bit(MMF_OOM_SKIP, &mm->flags))
- ret = -EINVAL;
- }
+ mm = p->mm;
+ mmgrab(mm);
+
+ if (task_will_free_mem(p))
+ reap = true;
+ else {
+ /* Error only if the work has not been done already */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ ret = -EINVAL;
}
task_unlock(p);
@@ -1188,13 +1188,16 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
ret = -EINTR;
goto drop_mm;
}
- if (!__oom_reap_task_mm(mm))
+ /*
+ * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
+ * possible change in exit_mmap is seen
+ */
+ if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
ret = -EAGAIN;
mmap_read_unlock(mm);
drop_mm:
- if (mm)
- mmput(mm);
+ mmdrop(mm);
put_task:
put_task_struct(task);
return ret;
With exit_mmap holding mmap_write_lock during free_pgtables call, process_mrelease does not need to elevate mm->mm_users in order to prevent exit_mmap from destrying pagetables while __oom_reap_task_mm is walking the VMA tree. The change prevents process_mrelease from calling the last mmput, which can lead to waiting for IO completion in exit_aio. Fixes: 337546e83fc7 ("mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap") Signed-off-by: Suren Baghdasaryan <surenb@google.com> --- mm/oom_kill.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-)