diff mbox series

[v2] binfmt_elf: Dump smaller VMAs first in ELF cores

Message ID C21B229F-D1E6-4E44-B506-A5ED4019A9DE@juniper.net (mailing list archive)
State New
Headers show
Series [v2] binfmt_elf: Dump smaller VMAs first in ELF cores | expand

Commit Message

Brian Mak Aug. 2, 2024, 10:37 p.m. UTC
Large cores may be truncated in some scenarios, such as with daemons
with stop timeouts that are not large enough or lack of disk space. This
impacts debuggability with large core dumps since critical information
necessary to form a usable backtrace, such as stacks and shared library
information, are omitted.

Attempting to find all the VMAs necessary to form a proper backtrace and
then prioritizing those VMAs specifically while core dumping is complex.
So instead, we can mitigate the impact of core dump truncation by
dumping smaller VMAs first, which may be more likely to contain memory
necessary to form a usable backtrace.

By sorting VMAs by dump size and dumping in that order, we have a
simple, yet effective heuristic.

Signed-off-by: Brian Mak <makb@juniper.net>
---

v2: Edited commit message to include more reasoning for sorting VMAs
    Removed conditional VMA sorting with debugfs knob
    Above edits suggested by Eric Biederman <ebiederm@xmission.com>

 fs/binfmt_elf.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)


base-commit: 94ede2a3e9135764736221c080ac7c0ad993dc2d

Comments

Eric W. Biederman Aug. 3, 2024, 3:25 a.m. UTC | #1
Brian Mak <makb@juniper.net> writes:

> Large cores may be truncated in some scenarios, such as with daemons
> with stop timeouts that are not large enough or lack of disk space. This
> impacts debuggability with large core dumps since critical information
> necessary to form a usable backtrace, such as stacks and shared library
> information, are omitted.
>
> Attempting to find all the VMAs necessary to form a proper backtrace and
> then prioritizing those VMAs specifically while core dumping is complex.
> So instead, we can mitigate the impact of core dump truncation by
> dumping smaller VMAs first, which may be more likely to contain memory
> necessary to form a usable backtrace.

I think I would say something like:  We attempted to figure out which
VMAs are needed to create a useful backtrace, and it turned out to
be a non-trivial problem.  So instead you tried simply sorting
the vma's by size and that worked.

Something to convey it is a tricky problem without an easy solution,
and that it wasn't work solving.

> By sorting VMAs by dump size and dumping in that order, we have a
> simple, yet effective heuristic.

I like the improvement to the description, and I like the improved
simplicity of this.

I will repeat myself quickly and say I would like this even better if
the sorting could be moved to the end of dump_vma_snapshot in
fs/coredump.c, and the original array of vmas could be sorted.

That removes all concerns about taking up more memory, as well
as adding the improvement to ELF FDPIC as well.

Sorting the original array will also change file_files_notes
which will result in the NT_FILE note ordering the mapped files
in the same order as the program headers are written.  There
is enough information I don't think it matters, but consistency
is nice.

Eric

> Signed-off-by: Brian Mak <makb@juniper.net>
> ---
>
> v2: Edited commit message to include more reasoning for sorting VMAs
>     Removed conditional VMA sorting with debugfs knob
>     Above edits suggested by Eric Biederman <ebiederm@xmission.com>
>
>  fs/binfmt_elf.c | 32 ++++++++++++++++++++++++++++++--
>  1 file changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 19fa49cd9907..7feadbdd9ee6 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -37,6 +37,7 @@
>  #include <linux/elf-randomize.h>
>  #include <linux/utsname.h>
>  #include <linux/coredump.h>
> +#include <linux/sort.h>
>  #include <linux/sched.h>
>  #include <linux/sched/coredump.h>
>  #include <linux/sched/task_stack.h>
> @@ -1990,6 +1991,20 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
>  	shdr4extnum->sh_info = segs;
>  }
>  
> +static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
> +{
> +	const struct core_vma_metadata *vma_meta_lhs = *(const struct core_vma_metadata **)
> +		vma_meta_lhs_ptr;
> +	const struct core_vma_metadata *vma_meta_rhs = *(const struct core_vma_metadata **)
> +		vma_meta_rhs_ptr;
> +
> +	if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
> +		return -1;
> +	if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
> +		return 1;
> +	return 0;
> +}
> +
>  /*
>   * Actual dumper
>   *
> @@ -2008,6 +2023,7 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	struct elf_shdr *shdr4extnum = NULL;
>  	Elf_Half e_phnum;
>  	elf_addr_t e_shoff;
> +	struct core_vma_metadata **sorted_vmas = NULL;
>  
>  	/*
>  	 * The number of segs are recored into ELF header as 16bit value.
> @@ -2071,9 +2087,20 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
>  		goto end_coredump;
>  
> +	/* Allocate memory to sort VMAs and sort. */
> +	sorted_vmas = kvmalloc_array(cprm->vma_count, sizeof(*sorted_vmas), GFP_KERNEL);
> +
> +	if (!sorted_vmas)
> +		goto end_coredump;
> +
> +	for (i = 0; i < cprm->vma_count; i++)
> +		sorted_vmas[i] = cprm->vma_meta + i;
> +
> +	sort(sorted_vmas, cprm->vma_count, sizeof(*sorted_vmas), cmp_vma_size, NULL);
> +
>  	/* Write program headers for segments dump */
>  	for (i = 0; i < cprm->vma_count; i++) {
> -		struct core_vma_metadata *meta = cprm->vma_meta + i;
> +		struct core_vma_metadata *meta = sorted_vmas[i];
>  		struct elf_phdr phdr;
>  
>  		phdr.p_type = PT_LOAD;
> @@ -2111,7 +2138,7 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	dump_skip_to(cprm, dataoff);
>  
>  	for (i = 0; i < cprm->vma_count; i++) {
> -		struct core_vma_metadata *meta = cprm->vma_meta + i;
> +		struct core_vma_metadata *meta = sorted_vmas[i];
>  
>  		if (!dump_user_range(cprm, meta->start, meta->dump_size))
>  			goto end_coredump;
> @@ -2129,6 +2156,7 @@ static int elf_core_dump(struct coredump_params *cprm)
>  	free_note_info(&info);
>  	kfree(shdr4extnum);
>  	kfree(phdr4note);
> +	kvfree(sorted_vmas);
>  	return has_dumped;
>  }
>  
>
> base-commit: 94ede2a3e9135764736221c080ac7c0ad993dc2d
diff mbox series

Patch

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 19fa49cd9907..7feadbdd9ee6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -37,6 +37,7 @@ 
 #include <linux/elf-randomize.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
+#include <linux/sort.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
@@ -1990,6 +1991,20 @@  static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 	shdr4extnum->sh_info = segs;
 }
 
+static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
+{
+	const struct core_vma_metadata *vma_meta_lhs = *(const struct core_vma_metadata **)
+		vma_meta_lhs_ptr;
+	const struct core_vma_metadata *vma_meta_rhs = *(const struct core_vma_metadata **)
+		vma_meta_rhs_ptr;
+
+	if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
+		return -1;
+	if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
+		return 1;
+	return 0;
+}
+
 /*
  * Actual dumper
  *
@@ -2008,6 +2023,7 @@  static int elf_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
+	struct core_vma_metadata **sorted_vmas = NULL;
 
 	/*
 	 * The number of segs are recored into ELF header as 16bit value.
@@ -2071,9 +2087,20 @@  static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
 		goto end_coredump;
 
+	/* Allocate memory to sort VMAs and sort. */
+	sorted_vmas = kvmalloc_array(cprm->vma_count, sizeof(*sorted_vmas), GFP_KERNEL);
+
+	if (!sorted_vmas)
+		goto end_coredump;
+
+	for (i = 0; i < cprm->vma_count; i++)
+		sorted_vmas[i] = cprm->vma_meta + i;
+
+	sort(sorted_vmas, cprm->vma_count, sizeof(*sorted_vmas), cmp_vma_size, NULL);
+
 	/* Write program headers for segments dump */
 	for (i = 0; i < cprm->vma_count; i++) {
-		struct core_vma_metadata *meta = cprm->vma_meta + i;
+		struct core_vma_metadata *meta = sorted_vmas[i];
 		struct elf_phdr phdr;
 
 		phdr.p_type = PT_LOAD;
@@ -2111,7 +2138,7 @@  static int elf_core_dump(struct coredump_params *cprm)
 	dump_skip_to(cprm, dataoff);
 
 	for (i = 0; i < cprm->vma_count; i++) {
-		struct core_vma_metadata *meta = cprm->vma_meta + i;
+		struct core_vma_metadata *meta = sorted_vmas[i];
 
 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
 			goto end_coredump;
@@ -2129,6 +2156,7 @@  static int elf_core_dump(struct coredump_params *cprm)
 	free_note_info(&info);
 	kfree(shdr4extnum);
 	kfree(phdr4note);
+	kvfree(sorted_vmas);
 	return has_dumped;
 }