Message ID | 156007493995.3335.9595044802115356911.stgit@buzz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: use down_read_killable for locking mmap_sem in proc | expand |
On Sun, Jun 09, 2019 at 01:09:00PM +0300, Konstantin Khlebnikov wrote: > Do not stuck forever if something wrong. > Killable lock allows to cleanup stuck tasks and simplifies investigation. This patch breaks the CRIU project, because stat() returns EINTR instead of ENOENT: [root@fc24 criu]# stat /proc/self/map_files/0-0 stat: cannot stat '/proc/self/map_files/0-0': Interrupted system call Here is one inline comment with the fix for this issue. > > It seems ->d_revalidate() could return any error (except ECHILD) to > abort validation and pass error as result of lookup sequence. > > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> > Reviewed-by: Roman Gushchin <guro@fb.com> > Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> > Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> It was nice to see all four of you in one place :). > Acked-by: Michal Hocko <mhocko@suse.com> > --- > fs/proc/base.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-) > > diff --git a/fs/proc/base.c b/fs/proc/base.c > index 9c8ca6cd3ce4..515ab29c2adf 100644 > --- a/fs/proc/base.c > +++ b/fs/proc/base.c > @@ -1962,9 +1962,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) > goto out; > > if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { > - down_read(&mm->mmap_sem); > - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); > - up_read(&mm->mmap_sem); > + status = down_read_killable(&mm->mmap_sem); > + if (!status) { > + exact_vma_exists = !!find_exact_vma(mm, vm_start, > + vm_end); > + up_read(&mm->mmap_sem); > + } > } > > mmput(mm); > @@ -2010,8 +2013,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) > if (rc) > goto out_mmput; > > + rc = down_read_killable(&mm->mmap_sem); > + if (rc) > + goto out_mmput; > + > rc = -ENOENT; > - down_read(&mm->mmap_sem); > vma = find_exact_vma(mm, vm_start, vm_end); > if (vma && vma->vm_file) { > *path = vma->vm_file->f_path; > @@ -2107,7 +2113,10 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, > if (!mm) > goto out_put_task; > > - down_read(&mm->mmap_sem); > + result = ERR_PTR(-EINTR); > + if (down_read_killable(&mm->mmap_sem)) > + goto out_put_mm; > + result = ERR_PTR(-ENOENT); > vma = find_exact_vma(mm, vm_start, vm_end); > if (!vma) > goto out_no_vma; > @@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, > > out_no_vma: > up_read(&mm->mmap_sem); > +out_put_mm: > mmput(mm); > out_put_task: > put_task_struct(task); > @@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) > mm = get_task_mm(task); > if (!mm) > goto out_put_task; > - down_read(&mm->mmap_sem); > + > + ret = down_read_killable(&mm->mmap_sem); > + if (ret) { > + mmput(mm); > + goto out_put_task; > + } > > nr_files = 0; >
On Wed, 12 Jun 2019 16:14:28 -0700 Andrei Vagin <avagin@gmail.com> wrote: > On Sun, Jun 09, 2019 at 01:09:00PM +0300, Konstantin Khlebnikov wrote: > > Do not stuck forever if something wrong. > > Killable lock allows to cleanup stuck tasks and simplifies investigation. > > This patch breaks the CRIU project, because stat() returns EINTR instead > of ENOENT: > > [root@fc24 criu]# stat /proc/self/map_files/0-0 > stat: cannot stat '/proc/self/map_files/0-0': Interrupted system call > > Here is one inline comment with the fix for this issue. > > > @@ -2107,7 +2113,10 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, > > if (!mm) > > goto out_put_task; > > > > - down_read(&mm->mmap_sem); > > + result = ERR_PTR(-EINTR); > > + if (down_read_killable(&mm->mmap_sem)) > > + goto out_put_mm; > > + > > result = ERR_PTR(-ENOENT); > yes, thanks. --- a/fs/proc/base.c~proc-use-down_read_killable-mmap_sem-for-proc-pid-map_files-fix +++ a/fs/proc/base.c @@ -2117,6 +2117,7 @@ static struct dentry *proc_map_files_loo if (down_read_killable(&mm->mmap_sem)) goto out_put_mm; + result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; We started doing this trick of using ret = -EFOO; if (something) goto out; decades ago because it generated slightly better code. I rather doubt whether that's still the case. In fact this: --- a/fs/proc/base.c~a +++ a/fs/proc/base.c @@ -2096,35 +2096,45 @@ static struct dentry *proc_map_files_loo struct dentry *result; struct mm_struct *mm; - result = ERR_PTR(-ENOENT); task = get_proc_task(dir); - if (!task) + if (!task) { + result = ERR_PTR(-ENOENT); goto out; + } - result = ERR_PTR(-EACCES); - if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { + result = ERR_PTR(-EACCES); goto out_put_task; + } - result = ERR_PTR(-ENOENT); - if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) + if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) { + result = ERR_PTR(-ENOENT); goto out_put_task; + } mm = get_task_mm(task); - if (!mm) + if (!mm) { + result = ERR_PTR(-ENOENT); goto out_put_task; + } - result = ERR_PTR(-EINTR); - if (down_read_killable(&mm->mmap_sem)) + if (down_read_killable(&mm->mmap_sem)) { + result = ERR_PTR(-EINTR); goto out_put_mm; + } - result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); - if (!vma) + if (!vma) { + result = ERR_PTR(-ENOENT); goto out_no_vma; + } - if (vma->vm_file) + if (vma->vm_file) { result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); + } else { + result = ERR_PTR(-ENOENT); + } out_no_vma: up_read(&mm->mmap_sem); makes no change to the generated assembly with gcc-7.2.0. And I do think that the above style is clearer and more reliable, as this bug demonstrates.
On Wed, Jun 12, 2019 at 04:14:28PM -0700, Andrei Vagin wrote: > On Sun, Jun 09, 2019 at 01:09:00PM +0300, Konstantin Khlebnikov wrote: > > Do not stuck forever if something wrong. > > Killable lock allows to cleanup stuck tasks and simplifies investigation. > > This patch breaks the CRIU project, because stat() returns EINTR instead > of ENOENT: > > [root@fc24 criu]# stat /proc/self/map_files/0-0 > stat: cannot stat '/proc/self/map_files/0-0': Interrupted system call > > Here is one inline comment with the fix for this issue. > > > > > It seems ->d_revalidate() could return any error (except ECHILD) to > > abort validation and pass error as result of lookup sequence. > > > > Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> > > Reviewed-by: Roman Gushchin <guro@fb.com> > > Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> > > Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> > > It was nice to see all four of you in one place :). Holymoly ;) And we all managed to miss this error code. Thanks, Andrew!
On 13.06.2019 2:14, Andrei Vagin wrote: > On Sun, Jun 09, 2019 at 01:09:00PM +0300, Konstantin Khlebnikov wrote: >> Do not stuck forever if something wrong. >> Killable lock allows to cleanup stuck tasks and simplifies investigation. > > This patch breaks the CRIU project, because stat() returns EINTR instead > of ENOENT: > > [root@fc24 criu]# stat /proc/self/map_files/0-0 > stat: cannot stat '/proc/self/map_files/0-0': Interrupted system call Good catch. It seems CRIU tests has good coverage for darkest corners of kernel API. Kernel CI projects should use it. I suppose you know how to promote this. =) > > Here is one inline comment with the fix for this issue. > >> >> It seems ->d_revalidate() could return any error (except ECHILD) to >> abort validation and pass error as result of lookup sequence. >> >> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> >> Reviewed-by: Roman Gushchin <guro@fb.com> >> Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> >> Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> > > It was nice to see all four of you in one place :). > >> Acked-by: Michal Hocko <mhocko@suse.com> >> --- >> fs/proc/base.c | 27 +++++++++++++++++++++------ >> 1 file changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/fs/proc/base.c b/fs/proc/base.c >> index 9c8ca6cd3ce4..515ab29c2adf 100644 >> --- a/fs/proc/base.c >> +++ b/fs/proc/base.c >> @@ -1962,9 +1962,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) >> goto out; >> >> if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { >> - down_read(&mm->mmap_sem); >> - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); >> - up_read(&mm->mmap_sem); >> + status = down_read_killable(&mm->mmap_sem); >> + if (!status) { >> + exact_vma_exists = !!find_exact_vma(mm, vm_start, >> + vm_end); >> + up_read(&mm->mmap_sem); >> + } >> } >> >> mmput(mm); >> @@ -2010,8 +2013,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) >> if (rc) >> goto out_mmput; >> >> + rc = down_read_killable(&mm->mmap_sem); >> + if (rc) >> + goto out_mmput; >> + >> rc = -ENOENT; >> - down_read(&mm->mmap_sem); >> vma = find_exact_vma(mm, vm_start, vm_end); >> if (vma && vma->vm_file) { >> *path = vma->vm_file->f_path; >> @@ -2107,7 +2113,10 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, >> if (!mm) >> goto out_put_task; >> >> - down_read(&mm->mmap_sem); >> + result = ERR_PTR(-EINTR); >> + if (down_read_killable(&mm->mmap_sem)) >> + goto out_put_mm; >> + > > result = ERR_PTR(-ENOENT); > >> vma = find_exact_vma(mm, vm_start, vm_end); >> if (!vma) >> goto out_no_vma; >> @@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, >> >> out_no_vma: >> up_read(&mm->mmap_sem); >> +out_put_mm: >> mmput(mm); >> out_put_task: >> put_task_struct(task); >> @@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) >> mm = get_task_mm(task); >> if (!mm) >> goto out_put_task; >> - down_read(&mm->mmap_sem); >> + >> + ret = down_read_killable(&mm->mmap_sem); >> + if (ret) { >> + mmput(mm); >> + goto out_put_task; >> + } >> >> nr_files = 0; >>
On Thu, Jun 13, 2019 at 1:15 AM Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote: > > On 13.06.2019 2:14, Andrei Vagin wrote: > > On Sun, Jun 09, 2019 at 01:09:00PM +0300, Konstantin Khlebnikov wrote: > >> Do not stuck forever if something wrong. > >> Killable lock allows to cleanup stuck tasks and simplifies investigation. > > > > This patch breaks the CRIU project, because stat() returns EINTR instead > > of ENOENT: > > > > [root@fc24 criu]# stat /proc/self/map_files/0-0 > > stat: cannot stat '/proc/self/map_files/0-0': Interrupted system call > > Good catch. > > It seems CRIU tests has good coverage for darkest corners of kernel API. > Kernel CI projects should use it. I suppose you know how to promote this. =) I remember Mike was trying to add the CRIU test suite into kernel-ci, but it looks like this ended up with nothing. The good thing here is that we have our own kernel-ci: https://travis-ci.org/avagin/linux/builds Travis-CI doesn't allow to replace the kernel, so we use CRIU to dump/restore a ssh session and travis doesn't notice when we kexec a new kernel. > > > > > Here is one inline comment with the fix for this issue. > > > >> > >> It seems ->d_revalidate() could return any error (except ECHILD) to > >> abort validation and pass error as result of lookup sequence. > >> > >> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> > >> Reviewed-by: Roman Gushchin <guro@fb.com> > >> Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com> > >> Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> > > > > It was nice to see all four of you in one place :). > > > >> Acked-by: Michal Hocko <mhocko@suse.com> > >> --- > >> fs/proc/base.c | 27 +++++++++++++++++++++------ > >> 1 file changed, 21 insertions(+), 6 deletions(-) > >> > >> diff --git a/fs/proc/base.c b/fs/proc/base.c > >> index 9c8ca6cd3ce4..515ab29c2adf 100644 > >> --- a/fs/proc/base.c > >> +++ b/fs/proc/base.c > >> @@ -1962,9 +1962,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) > >> goto out; > >> > >> if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { > >> - down_read(&mm->mmap_sem); > >> - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); > >> - up_read(&mm->mmap_sem); > >> + status = down_read_killable(&mm->mmap_sem); > >> + if (!status) { > >> + exact_vma_exists = !!find_exact_vma(mm, vm_start, > >> + vm_end); > >> + up_read(&mm->mmap_sem); > >> + } > >> } > >> > >> mmput(mm); > >> @@ -2010,8 +2013,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) > >> if (rc) > >> goto out_mmput; > >> > >> + rc = down_read_killable(&mm->mmap_sem); > >> + if (rc) > >> + goto out_mmput; > >> + > >> rc = -ENOENT; > >> - down_read(&mm->mmap_sem); > >> vma = find_exact_vma(mm, vm_start, vm_end); > >> if (vma && vma->vm_file) { > >> *path = vma->vm_file->f_path; > >> @@ -2107,7 +2113,10 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, > >> if (!mm) > >> goto out_put_task; > >> > >> - down_read(&mm->mmap_sem); > >> + result = ERR_PTR(-EINTR); > >> + if (down_read_killable(&mm->mmap_sem)) > >> + goto out_put_mm; > >> + > > > > result = ERR_PTR(-ENOENT); > > > >> vma = find_exact_vma(mm, vm_start, vm_end); > >> if (!vma) > >> goto out_no_vma; > >> @@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, > >> > >> out_no_vma: > >> up_read(&mm->mmap_sem); > >> +out_put_mm: > >> mmput(mm); > >> out_put_task: > >> put_task_struct(task); > >> @@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) > >> mm = get_task_mm(task); > >> if (!mm) > >> goto out_put_task; > >> - down_read(&mm->mmap_sem); > >> + > >> + ret = down_read_killable(&mm->mmap_sem); > >> + if (ret) { > >> + mmput(mm); > >> + goto out_put_task; > >> + } > >> > >> nr_files = 0; > >>
diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..515ab29c2adf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1962,9 +1962,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { - down_read(&mm->mmap_sem); - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); - up_read(&mm->mmap_sem); + status = down_read_killable(&mm->mmap_sem); + if (!status) { + exact_vma_exists = !!find_exact_vma(mm, vm_start, + vm_end); + up_read(&mm->mmap_sem); + } } mmput(mm); @@ -2010,8 +2013,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = down_read_killable(&mm->mmap_sem); + if (rc) + goto out_mmput; + rc = -ENOENT; - down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = vma->vm_file->f_path; @@ -2107,7 +2113,10 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + result = ERR_PTR(-EINTR); + if (down_read_killable(&mm->mmap_sem)) + goto out_put_mm; + vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; @@ -2118,6 +2127,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, out_no_vma: up_read(&mm->mmap_sem); +out_put_mm: mmput(mm); out_put_task: put_task_struct(task); @@ -2160,7 +2170,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) mm = get_task_mm(task); if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + + ret = down_read_killable(&mm->mmap_sem); + if (ret) { + mmput(mm); + goto out_put_task; + } nr_files = 0;