Message ID | 20250414-work-coredump-v2-3-685bf231f828@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | coredump: hand a pidfd to the usermode coredump helper | expand |
On 04/14, Christian Brauner wrote: > > -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) > { > struct file *files[2]; > struct coredump_params *cp = (struct coredump_params *)info->data; > int err; > > + if (cp->pid) { > + struct file *pidfs_file __free(fput) = NULL; > + > + pidfs_file = pidfs_alloc_file(cp->pid, 0); > + if (IS_ERR(pidfs_file)) > + return PTR_ERR(pidfs_file); > + > + /* > + * Usermode helpers are childen of either > + * system_unbound_wq or of kthreadd. So we know that > + * we're starting off with a clean file descriptor > + * table. So we should always be able to use > + * COREDUMP_PIDFD_NUMBER as our file descriptor value. > + */ > + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); > + > + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); > + if (err < 0) > + return err; Yes, but if replace_fd() succeeds we need to nullify pidfs_file to avoid fput from __free(fput) ? And I think in this case __free(fput) doesn't buy too much, but up to you. Oleg.
On Mon, Apr 14, 2025 at 04:14:50PM +0200, Oleg Nesterov wrote: > On 04/14, Christian Brauner wrote: > > > > -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > > +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) > > { > > struct file *files[2]; > > struct coredump_params *cp = (struct coredump_params *)info->data; > > int err; > > > > + if (cp->pid) { > > + struct file *pidfs_file __free(fput) = NULL; > > + > > + pidfs_file = pidfs_alloc_file(cp->pid, 0); > > + if (IS_ERR(pidfs_file)) > > + return PTR_ERR(pidfs_file); > > + > > + /* > > + * Usermode helpers are childen of either > > + * system_unbound_wq or of kthreadd. So we know that > > + * we're starting off with a clean file descriptor > > + * table. So we should always be able to use > > + * COREDUMP_PIDFD_NUMBER as our file descriptor value. > > + */ > > + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); > > + > > + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); > > + if (err < 0) > > + return err; > > Yes, but if replace_fd() succeeds we need to nullify pidfs_file > to avoid fput from __free(fput) ? No, since replace_fd() takes its own reference via do_dup2(): replace_fd() -> do_dup2() { get_file(file) rcu_assign_pointer(fdt->fd[fd], file); } so we always need to call it. I had a comment about this in the previous patchset so people don't get confused. I can add it back. Let me know if you're happy with this otherwise. > > And I think in this case __free(fput) doesn't buy too much, but > up to you. > > Oleg. >
On 04/14, Oleg Nesterov wrote: > > On 04/14, Christian Brauner wrote: > > > > -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > > +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) > > { > > struct file *files[2]; > > struct coredump_params *cp = (struct coredump_params *)info->data; > > int err; > > > > + if (cp->pid) { > > + struct file *pidfs_file __free(fput) = NULL; > > + > > + pidfs_file = pidfs_alloc_file(cp->pid, 0); > > + if (IS_ERR(pidfs_file)) > > + return PTR_ERR(pidfs_file); > > + > > + /* > > + * Usermode helpers are childen of either > > + * system_unbound_wq or of kthreadd. So we know that > > + * we're starting off with a clean file descriptor > > + * table. So we should always be able to use > > + * COREDUMP_PIDFD_NUMBER as our file descriptor value. > > + */ > > + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); > > + > > + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); > > + if (err < 0) > > + return err; > > Yes, but if replace_fd() succeeds we need to nullify pidfs_file > to avoid fput from __free(fput) ? Aah, please ignore me ;) replace_fd/do_dup2 does get_file() . For this series: Reviewed-by: Oleg Nesterov <oleg@redhat.com>
On Mon, Apr 14, 2025 at 04:28:07PM +0200, Oleg Nesterov wrote: > On 04/14, Oleg Nesterov wrote: > > > > On 04/14, Christian Brauner wrote: > > > > > > -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) > > > +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) > > > { > > > struct file *files[2]; > > > struct coredump_params *cp = (struct coredump_params *)info->data; > > > int err; > > > > > > + if (cp->pid) { > > > + struct file *pidfs_file __free(fput) = NULL; > > > + > > > + pidfs_file = pidfs_alloc_file(cp->pid, 0); > > > + if (IS_ERR(pidfs_file)) > > > + return PTR_ERR(pidfs_file); > > > + > > > + /* > > > + * Usermode helpers are childen of either > > > + * system_unbound_wq or of kthreadd. So we know that > > > + * we're starting off with a clean file descriptor > > > + * table. So we should always be able to use > > > + * COREDUMP_PIDFD_NUMBER as our file descriptor value. > > > + */ > > > + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); > > > + > > > + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); > > > + if (err < 0) > > > + return err; > > > > Yes, but if replace_fd() succeeds we need to nullify pidfs_file > > to avoid fput from __free(fput) ? > > Aah, please ignore me ;) replace_fd/do_dup2 does get_file() . > > For this series: Thanks for the excellent review as usual!
diff --git a/fs/coredump.c b/fs/coredump.c index 9da592aa8f16..403be0ff780e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -43,6 +43,9 @@ #include <linux/timekeeping.h> #include <linux/sysctl.h> #include <linux/elf.h> +#include <linux/pidfs.h> +#include <uapi/linux/pidfd.h> +#include <linux/vfsdebug.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -60,6 +63,12 @@ static void free_vma_snapshot(struct coredump_params *cprm); #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) /* Define a reasonable max cap */ #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 static int core_uses_pid; static unsigned int core_pipe_limit; @@ -339,6 +348,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + /* + * Installing a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (!ispipe) + break; + + /* + * Note that we'll install a pidfd for the + * thread-group leader. We know that task + * linkage hasn't been removed yet and even if + * this @current isn't the actual thread-group + * leader we know that the thread-group leader + * cannot be reaped until @current has exited. + */ + cprm->pid = task_tgid(current); + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + break; + } default: break; } @@ -493,7 +523,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -503,12 +533,33 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; int err; + if (cp->pid) { + struct file *pidfs_file __free(fput) = NULL; + + pidfs_file = pidfs_alloc_file(cp->pid, 0); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + + /* + * Usermode helpers are childen of either + * system_unbound_wq or of kthreadd. So we know that + * we're starting off with a clean file descriptor + * table. So we should always be able to use + * COREDUMP_PIDFD_NUMBER as our file descriptor value. + */ + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); + + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + err = create_pipe_files(files, 0); if (err) return err; @@ -598,7 +649,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use @@ -637,7 +688,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); + umh_coredump_setup, NULL, &cprm); if (sub_info) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit;