diff mbox series

[2/2] merge-ort: fix small memory leak in unique_path()

Message ID 73bc1e5c5dffbe9c132ea786dd414ef2159967e3.1645290601.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit 81afc7941294cec828daaff86c040b1edf099f25
Headers show
Series Fix a couple small leaks in merge-ort | expand

Commit Message

Elijah Newren Feb. 19, 2022, 5:10 p.m. UTC
From: Elijah Newren <newren@gmail.com>

The struct strmap paths member of merge_options_internal is perhaps the
most central data structure to all of merge-ort.  Because all the paths
involved in the merge need to be kept until the merge is complete, this
"paths" data structure traditionally took responsibility for owning all
the allocated paths.  When the merge is over, those paths were free()d
as part of free()ing this strmap.

In commit 6697ee01b5d3 (merge-ort: switch our strmaps over to using
memory pools, 2021-07-30), we changed the allocations for pathnames to
come from a memory pool.  That meant the ownership changed slightly;
there were no individual free() calls to make, instead the memory pool
owned all those paths and they were free()d all at once.

Unfortunately unique_path() was written presuming the pre-memory-pool
model, and allocated a path on the heap and left it in the strmap for
later free()ing.  Modify it to return a path allocated from the memory
pool instead.

Note that there's one instance -- in record_conflicted_index_entries()
-- where the returned string from unique_path() was only used very
temporarily and thus had been immediately free()'d.  This codepath was
associated with an ugly skip-worktree workaround that has since been
better fixed by the in-flight en/present-despite-skipped topic.  This
workaround probably makes sense to excise once that topic merges down,
but for now, just remove the immediate free() and allow the returned
string to be free()d when the memory pool is released.

This fixes the following memory leak as reported by valgrind:

==PID== 65 bytes in 1 blocks are definitely lost in loss record 79 of 134
==PID==    at 0xADDRESS: malloc
==PID==    by 0xADDRESS: realloc
==PID==    by 0xADDRESS: xrealloc (wrapper.c:126)
==PID==    by 0xADDRESS: strbuf_grow (strbuf.c:98)
==PID==    by 0xADDRESS: strbuf_vaddf (strbuf.c:394)
==PID==    by 0xADDRESS: strbuf_addf (strbuf.c:335)
==PID==    by 0xADDRESS: unique_path (merge-ort.c:733)
==PID==    by 0xADDRESS: process_entry (merge-ort.c:3678)
==PID==    by 0xADDRESS: process_entries (merge-ort.c:4037)
==PID==    by 0xADDRESS: merge_ort_nonrecursive_internal (merge-ort.c:4621)
==PID==    by 0xADDRESS: merge_ort_internal (merge-ort.c:4709)
==PID==    by 0xADDRESS: merge_incore_recursive (merge-ort.c:4760)
==PID==    by 0xADDRESS: merge_ort_recursive (merge-ort-wrappers.c:57)
==PID==    by 0xADDRESS: try_merge_strategy (merge.c:753)

Reported-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Feb. 19, 2022, 10:22 p.m. UTC | #1
On Sat, Feb 19 2022, Elijah Newren via GitGitGadget wrote:

> From: Elijah Newren <newren@gmail.com>
>
> The struct strmap paths member of merge_options_internal is perhaps the
> most central data structure to all of merge-ort.  Because all the paths
> involved in the merge need to be kept until the merge is complete, this
> "paths" data structure traditionally took responsibility for owning all
> the allocated paths.  When the merge is over, those paths were free()d
> as part of free()ing this strmap.
>
> In commit 6697ee01b5d3 (merge-ort: switch our strmaps over to using
> memory pools, 2021-07-30), we changed the allocations for pathnames to
> come from a memory pool.  That meant the ownership changed slightly;
> there were no individual free() calls to make, instead the memory pool
> owned all those paths and they were free()d all at once.
>
> Unfortunately unique_path() was written presuming the pre-memory-pool
> model, and allocated a path on the heap and left it in the strmap for
> later free()ing.  Modify it to return a path allocated from the memory
> pool instead.

This seems like a rather obvious fix to the leak, as the other side
wasn't ready to have the detached strbuf handed to it, and instead is
assuming everything is mempools.

The downside is a bit of heap churn here since you malloc() & use the
strbuf just to ask for that size from the mempool, and then free() the
strbuf (of course we had that before, we just weren't free-ing).

So this is just an aside & I have no idea if it's worth it, but FWIW you
can have your cake & eat it too here memory-allocation wise and avoid
the strbuf allocation entirely, and just use your mem-pool.

Like this:

diff --git a/merge-ort.c b/merge-ort.c
index 40ae4dc4e92..1111916d5cb 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -731,6 +731,16 @@ static char *unique_path(struct merge_options *opt,
 	int suffix = 0;
 	size_t base_len;
 	struct strmap *existing_paths = &opt->priv->paths;
+	/*
+	 * pre-size path + ~ + branch + _%d + "\0". Hopefully 6 digits
+	 * of suffix is enough for everyone?
+	 */
+	const size_t max_suffix = 6;
+	const size_t expected_len = strlen(path) + 1 + strlen(branch) + 1 +
+		max_suffix + 1;
+
+	ret = mem_pool_alloc(&opt->priv->pool, expected_len);
+	strbuf_attach(&newpath, ret, 0, expected_len);
 
 	strbuf_addf(&newpath, "%s~", path);
 	add_flattened_path(&newpath, branch);
@@ -741,10 +751,10 @@ static char *unique_path(struct merge_options *opt,
 		strbuf_addf(&newpath, "_%d", suffix++);
 	}
 
-	/* Track the new path in our memory pool */
-	ret = mem_pool_alloc(&opt->priv->pool, newpath.len + 1);
-	memcpy(ret, newpath.buf, newpath.len + 1);
-	strbuf_release(&newpath);
+	if (newpath.alloc > expected_len)
+		BUG("we assumed too much thinking '%s~%s' would fit in %lu, ended up %lu ('%s')",
+		    path, branch, expected_len, newpath.alloc, newpath.buf);
+
 	return ret;
 }
 

A bit nasty for sure, but if you're willing to BUG() out if we ever go
above 999999 suffix tries or whatever (which would be trivial to add to
the loop there) it's rather straightforward.

I.e. we know the size of the buffer ahead of time, except for that loop
that'll add "_%d" to the end, and that can be made bounded.

Obviously your solution's a lot simpler, so I think this is only
something you should consider if you think it matters for the
performance numbers linked to from 6697ee01b5d3. I'm not familiar enough
with merge-ort.c to know if it is in this case, or if this would be
pointless micro-optimization on a non-hot codepath.
Elijah Newren Feb. 20, 2022, 12:37 a.m. UTC | #2
On Sat, Feb 19, 2022 at 2:31 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
> On Sat, Feb 19 2022, Elijah Newren via GitGitGadget wrote:
>
> > From: Elijah Newren <newren@gmail.com>
> >
> > The struct strmap paths member of merge_options_internal is perhaps the
> > most central data structure to all of merge-ort.  Because all the paths
> > involved in the merge need to be kept until the merge is complete, this
> > "paths" data structure traditionally took responsibility for owning all
> > the allocated paths.  When the merge is over, those paths were free()d
> > as part of free()ing this strmap.
> >
> > In commit 6697ee01b5d3 (merge-ort: switch our strmaps over to using
> > memory pools, 2021-07-30), we changed the allocations for pathnames to
> > come from a memory pool.  That meant the ownership changed slightly;
> > there were no individual free() calls to make, instead the memory pool
> > owned all those paths and they were free()d all at once.
> >
> > Unfortunately unique_path() was written presuming the pre-memory-pool
> > model, and allocated a path on the heap and left it in the strmap for
> > later free()ing.  Modify it to return a path allocated from the memory
> > pool instead.
>
> This seems like a rather obvious fix to the leak, as the other side
> wasn't ready to have the detached strbuf handed to it, and instead is
> assuming everything is mempools.
>
> The downside is a bit of heap churn here since you malloc() & use the
> strbuf just to ask for that size from the mempool, and then free() the
> strbuf (of course we had that before, we just weren't free-ing).
>
> So this is just an aside & I have no idea if it's worth it, but FWIW you
> can have your cake & eat it too here memory-allocation wise and avoid
> the strbuf allocation entirely, and just use your mem-pool.
>
> Like this:
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 40ae4dc4e92..1111916d5cb 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -731,6 +731,16 @@ static char *unique_path(struct merge_options *opt,
>         int suffix = 0;
>         size_t base_len;
>         struct strmap *existing_paths = &opt->priv->paths;
> +       /*
> +        * pre-size path + ~ + branch + _%d + "\0". Hopefully 6 digits
> +        * of suffix is enough for everyone?
> +        */
> +       const size_t max_suffix = 6;
> +       const size_t expected_len = strlen(path) + 1 + strlen(branch) + 1 +
> +               max_suffix + 1;
> +
> +       ret = mem_pool_alloc(&opt->priv->pool, expected_len);
> +       strbuf_attach(&newpath, ret, 0, expected_len);
>
>         strbuf_addf(&newpath, "%s~", path);
>         add_flattened_path(&newpath, branch);
> @@ -741,10 +751,10 @@ static char *unique_path(struct merge_options *opt,
>                 strbuf_addf(&newpath, "_%d", suffix++);
>         }
>
> -       /* Track the new path in our memory pool */
> -       ret = mem_pool_alloc(&opt->priv->pool, newpath.len + 1);
> -       memcpy(ret, newpath.buf, newpath.len + 1);
> -       strbuf_release(&newpath);
> +       if (newpath.alloc > expected_len)
> +               BUG("we assumed too much thinking '%s~%s' would fit in %lu, ended up %lu ('%s')",
> +                   path, branch, expected_len, newpath.alloc, newpath.buf);
> +
>         return ret;
>  }
>
>
> A bit nasty for sure, but if you're willing to BUG() out if we ever go
> above 999999 suffix tries or whatever (which would be trivial to add to
> the loop there) it's rather straightforward.
>
> I.e. we know the size of the buffer ahead of time, except for that loop
> that'll add "_%d" to the end, and that can be made bounded.
>
> Obviously your solution's a lot simpler, so I think this is only
> something you should consider if you think it matters for the
> performance numbers linked to from 6697ee01b5d3. I'm not familiar enough
> with merge-ort.c to know if it is in this case, or if this would be
> pointless micro-optimization on a non-hot codepath.

That's an interesting idea, but it's a micro-optimization on a very
cold path.  You need to either have a D/F conflict that persists or a
mode-type conflict (e.g. an add/add conflict with symlink vs. file
types).  Those tend to be quite rare; in fact, the testcase with the
performance numbers in 6697ee01b5d3 didn't have any of these and never
even triggered this codepath (otherwise I would have caught the leak
in my earlier leak testing).  So I think simple and robust makes more
sense here.
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 4f5abc558c5..40ae4dc4e92 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -722,13 +722,15 @@  static void add_flattened_path(struct strbuf *out, const char *s)
 			out->buf[i] = '_';
 }
 
-static char *unique_path(struct strmap *existing_paths,
+static char *unique_path(struct merge_options *opt,
 			 const char *path,
 			 const char *branch)
 {
+	char *ret = NULL;
 	struct strbuf newpath = STRBUF_INIT;
 	int suffix = 0;
 	size_t base_len;
+	struct strmap *existing_paths = &opt->priv->paths;
 
 	strbuf_addf(&newpath, "%s~", path);
 	add_flattened_path(&newpath, branch);
@@ -739,7 +741,11 @@  static char *unique_path(struct strmap *existing_paths,
 		strbuf_addf(&newpath, "_%d", suffix++);
 	}
 
-	return strbuf_detach(&newpath, NULL);
+	/* Track the new path in our memory pool */
+	ret = mem_pool_alloc(&opt->priv->pool, newpath.len + 1);
+	memcpy(ret, newpath.buf, newpath.len + 1);
+	strbuf_release(&newpath);
+	return ret;
 }
 
 /*** Function Grouping: functions related to collect_merge_info() ***/
@@ -3679,7 +3685,7 @@  static void process_entry(struct merge_options *opt,
 		 */
 		df_file_index = (ci->dirmask & (1 << 1)) ? 2 : 1;
 		branch = (df_file_index == 1) ? opt->branch1 : opt->branch2;
-		path = unique_path(&opt->priv->paths, path, branch);
+		path = unique_path(opt, path, branch);
 		strmap_put(&opt->priv->paths, path, new_ci);
 
 		path_msg(opt, path, 0,
@@ -3804,14 +3810,12 @@  static void process_entry(struct merge_options *opt,
 			/* Insert entries into opt->priv_paths */
 			assert(rename_a || rename_b);
 			if (rename_a) {
-				a_path = unique_path(&opt->priv->paths,
-						     path, opt->branch1);
+				a_path = unique_path(opt, path, opt->branch1);
 				strmap_put(&opt->priv->paths, a_path, ci);
 			}
 
 			if (rename_b)
-				b_path = unique_path(&opt->priv->paths,
-						     path, opt->branch2);
+				b_path = unique_path(opt, path, opt->branch2);
 			else
 				b_path = path;
 			strmap_put(&opt->priv->paths, b_path, new_ci);
@@ -4199,7 +4203,7 @@  static int record_conflicted_index_entries(struct merge_options *opt)
 				struct stat st;
 
 				if (!lstat(path, &st)) {
-					char *new_name = unique_path(&opt->priv->paths,
+					char *new_name = unique_path(opt,
 								     path,
 								     "cruft");
 
@@ -4207,7 +4211,6 @@  static int record_conflicted_index_entries(struct merge_options *opt)
 						 _("Note: %s not up to date and in way of checking out conflicted version; old copy renamed to %s"),
 						 path, new_name);
 					errs |= rename(path, new_name);
-					free(new_name);
 				}
 				errs |= checkout_entry(ce, &state, NULL, NULL);
 			}