Message ID | 20250414-work-coredump-v1-3-6caebc807ff4@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | coredump: hand a pidfd to the usermode coredump helper | expand |
On 04/14, Christian Brauner wrote: > > + case 'F': { > + struct file *pidfs_file __free(fput) = NULL; > + > + /* > + * Install a pidfd only makes sense if > + * we actually spawn a usermode helper. > + */ > + if (!ispipe) > + break; > + > + /* > + * We already created a pidfs_file but the user > + * specified F multiple times. Just print the > + * number multiple times. > + */ > + if (!cprm->pidfs_file) { > + /* > + * Create a pidfs file for the > + * coredumping thread that we can > + * install into the usermode helper's > + * file descriptor table later. > + * > + * Note that we'll install a pidfd for > + * the thread-group leader. We know that > + * task linkage hasn't been removed yet > + * and even if this @current isn't the > + * actual thread-group leader we know > + * that the thread-group leader cannot > + * be reaped until @current has exited. > + */ > + pidfs_file = pidfs_alloc_file(task_tgid(current), 0); > + if (IS_ERR(pidfs_file)) > + return PTR_ERR(pidfs_file); > + } > + > + /* > + * Usermode helpers are childen of > + * either system_unbound_wq or of > + * kthreadd. So we know that we're > + * starting off with a clean file > + * descriptor table. Thus, we should > + * always be able to use file descriptor > + * number 3. > + */ > + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); > + if (err) > + return err; > + > + cprm->pidfs_file = no_free_ptr(pidfs_file); > + break; > + } So the new case 'F' differs from other case's in that it doesn't do "break" but returns the error... this is a bit inconsistent. Note also that if you do cn_printf() before pidfs_alloc_file(), then you can avoid __free(fput) and no_free_ptr(). But this is minor. Can't we simplify this patch? Rather than add the new pidfs_file member into coredump_params, we can add "struct pid *pid". format_corename() will simply do case 'F': if (ispipe) { // no need to do get_pid() cprm->pid = task_tgid(current); err = cn_printf(...); } break; and umh_pipe_setup() can itself do pidfs_alloc_file(cp->pid) if it is not NULL. This way do_coredump() doesn't need any changes. No? Oleg.
On Mon, Apr 14, 2025 at 02:48:44PM +0200, Oleg Nesterov wrote: > On 04/14, Christian Brauner wrote: > > > > + case 'F': { > > + struct file *pidfs_file __free(fput) = NULL; > > + > > + /* > > + * Install a pidfd only makes sense if > > + * we actually spawn a usermode helper. > > + */ > > + if (!ispipe) > > + break; > > + > > + /* > > + * We already created a pidfs_file but the user > > + * specified F multiple times. Just print the > > + * number multiple times. > > + */ > > + if (!cprm->pidfs_file) { > > + /* > > + * Create a pidfs file for the > > + * coredumping thread that we can > > + * install into the usermode helper's > > + * file descriptor table later. > > + * > > + * Note that we'll install a pidfd for > > + * the thread-group leader. We know that > > + * task linkage hasn't been removed yet > > + * and even if this @current isn't the > > + * actual thread-group leader we know > > + * that the thread-group leader cannot > > + * be reaped until @current has exited. > > + */ > > + pidfs_file = pidfs_alloc_file(task_tgid(current), 0); > > + if (IS_ERR(pidfs_file)) > > + return PTR_ERR(pidfs_file); > > + } > > + > > + /* > > + * Usermode helpers are childen of > > + * either system_unbound_wq or of > > + * kthreadd. So we know that we're > > + * starting off with a clean file > > + * descriptor table. Thus, we should > > + * always be able to use file descriptor > > + * number 3. > > + */ > > + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); > > + if (err) > > + return err; > > + > > + cprm->pidfs_file = no_free_ptr(pidfs_file); > > + break; > > + } > > So the new case 'F' differs from other case's in that it doesn't do > "break" but returns the error... this is a bit inconsistent. Yeah, though that already happens right at the top. > > Note also that if you do cn_printf() before pidfs_alloc_file(), then you > can avoid __free(fput) and no_free_ptr(). Seemed inconsitent to me to print first even though we didn't succeed but I agree that it doesn't matter. > > But this is minor. Can't we simplify this patch? Let's hear it... > > Rather than add the new pidfs_file member into coredump_params, we can > add "struct pid *pid". format_corename() will simply do > > case 'F': > if (ispipe) { > // no need to do get_pid() > cprm->pid = task_tgid(current); > err = cn_printf(...); > } > break; > > and umh_pipe_setup() can itself do pidfs_alloc_file(cp->pid) if it is > not NULL. Sure, same result but works for me. I'll send v2 in a bit. > > This way do_coredump() doesn't need any changes. > > No? Yep, sounds good.
diff --git a/fs/coredump.c b/fs/coredump.c index 2ed5e6956a09..ae0c1c5efe9a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -43,6 +43,9 @@ #include <linux/timekeeping.h> #include <linux/sysctl.h> #include <linux/elf.h> +#include <linux/pidfs.h> +#include <uapi/linux/pidfd.h> +#include <linux/vfsdebug.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -60,6 +63,12 @@ static void free_vma_snapshot(struct coredump_params *cprm); #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) /* Define a reasonable max cap */ #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 static int core_uses_pid; static unsigned int core_pipe_limit; @@ -339,6 +348,58 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + struct file *pidfs_file __free(fput) = NULL; + + /* + * Install a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (!ispipe) + break; + + /* + * We already created a pidfs_file but the user + * specified F multiple times. Just print the + * number multiple times. + */ + if (!cprm->pidfs_file) { + /* + * Create a pidfs file for the + * coredumping thread that we can + * install into the usermode helper's + * file descriptor table later. + * + * Note that we'll install a pidfd for + * the thread-group leader. We know that + * task linkage hasn't been removed yet + * and even if this @current isn't the + * actual thread-group leader we know + * that the thread-group leader cannot + * be reaped until @current has exited. + */ + pidfs_file = pidfs_alloc_file(task_tgid(current), 0); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + } + + /* + * Usermode helpers are childen of + * either system_unbound_wq or of + * kthreadd. So we know that we're + * starting off with a clean file + * descriptor table. Thus, we should + * always be able to use file descriptor + * number 3. + */ + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + if (err) + return err; + + cprm->pidfs_file = no_free_ptr(pidfs_file); + break; + } default: break; } @@ -493,7 +554,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -503,12 +564,26 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; + struct file *pidfs_file = cp->pidfs_file; int err; + if (pidfs_file) { + /* We must start from a pristine file descriptor table. */ + VFS_WARN_ON_ONCE((pidfs_file = fget_raw(COREDUMP_PIDFD_NUMBER)) != NULL); + + /* + * Takes it's own reference on success and do_coredump() + * will put the additional reference. + */ + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + err = create_pipe_files(files, 0); if (err) return err; @@ -598,7 +673,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use @@ -637,7 +712,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); + umh_coredump_setup, NULL, &cprm); if (sub_info) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); @@ -784,6 +859,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (ispipe) atomic_dec(&core_dump_count); fail_unlock: + if (cprm.pidfs_file) + fput(cprm.pidfs_file); kfree(argv); kfree(cn.corename); coredump_finish(core_dumped); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..af2ed4b68dcf 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct file *pidfs_file; }; extern unsigned int core_file_note_size_limit;