diff mbox series

[13/22] builtin/fast-export: plug leaking tag names

Message ID 64366155dee25209ab9c434016c5918d47d7e1d5.1722933642.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series Memory leak fixes (pt.4) | expand

Commit Message

Patrick Steinhardt Aug. 6, 2024, 9 a.m. UTC
When resolving revisions in `get_tags_and_duplicates()`, we only
partially manage the lifetime of `full_name`. In fact, managing its
lifetime properly is almost impossible because we put direct pointers to
that variable into multiple lists without duplicating the string. The
consequence is that these strings will ultimately leak.

Refactor the code to make the lists we put those names into duplicate
the memory. This allows us to properly free the string as required and
thus plugs the memory leak.

While this requires us to allocate more data overall, it shouldn't be
all that bad given that the number of allocations corresponds with the
number of command line parameters, which typically aren't all that many.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/fast-export.c            | 17 ++++++++++++-----
 t/t9351-fast-export-anonymize.sh |  1 +
 2 files changed, 13 insertions(+), 5 deletions(-)

Comments

James Liu Aug. 7, 2024, 8:31 a.m. UTC | #1
On Tue Aug 6, 2024 at 7:00 PM AEST, Patrick Steinhardt wrote:
> Refactor the code to make the lists we put those names into duplicate
> the memory. This allows us to properly free the string as required and
> thus plugs the memory leak.
>
> While this requires us to allocate more data overall, it shouldn't be
> all that bad given that the number of allocations corresponds with the
> number of command line parameters, which typically aren't all that many.

Ahh so using the `STRING_LIST_INIT_DUP` initialiser means that every
time we call `string_list_append()` on the list, we retain ownership of
the string and the list gets its own copy.

That means we're able to free our own copy later on.
Patrick Steinhardt Aug. 8, 2024, 5:05 a.m. UTC | #2
On Wed, Aug 07, 2024 at 06:31:33PM +1000, James Liu wrote:
> On Tue Aug 6, 2024 at 7:00 PM AEST, Patrick Steinhardt wrote:
> > Refactor the code to make the lists we put those names into duplicate
> > the memory. This allows us to properly free the string as required and
> > thus plugs the memory leak.
> >
> > While this requires us to allocate more data overall, it shouldn't be
> > all that bad given that the number of allocations corresponds with the
> > number of command line parameters, which typically aren't all that many.
> 
> Ahh so using the `STRING_LIST_INIT_DUP` initialiser means that every
> time we call `string_list_append()` on the list, we retain ownership of
> the string and the list gets its own copy.
> 
> That means we're able to free our own copy later on.

Yes, exactly. I think that we really should change the naming though.
I've repeatedly seen the pattern that people think initializing the list
wtih `_NODUP` would transfer ownership of inserted strings. It does not
though, it simply assumes that the strings will be kept alive by the
caller.

This is made worse by the fact that we have `strvec_insert_nodup()`,
which _does_ transfer ownership. So we're using two different meanings
for "nodup", so I totally get why people are confused by this interface.

I'll leave that for a separate series though.

Patrick
diff mbox series

Patch

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index fe92d2436c..f253b79322 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -42,8 +42,8 @@  static int full_tree;
 static int reference_excluded_commits;
 static int show_original_ids;
 static int mark_tags;
-static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
-static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
+static struct string_list extra_refs = STRING_LIST_INIT_DUP;
+static struct string_list tag_refs = STRING_LIST_INIT_DUP;
 static struct refspec refspecs = REFSPEC_INIT_FETCH;
 static int anonymize;
 static struct hashmap anonymized_seeds;
@@ -901,7 +901,7 @@  static void handle_tag(const char *name, struct tag *tag)
 	free(buf);
 }
 
-static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
+static struct commit *get_commit(struct rev_cmdline_entry *e, const char *full_name)
 {
 	switch (e->item->type) {
 	case OBJ_COMMIT:
@@ -932,14 +932,16 @@  static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 		struct rev_cmdline_entry *e = info->rev + i;
 		struct object_id oid;
 		struct commit *commit;
-		char *full_name;
+		char *full_name = NULL;
 
 		if (e->flags & UNINTERESTING)
 			continue;
 
 		if (repo_dwim_ref(the_repository, e->name, strlen(e->name),
-				  &oid, &full_name, 0) != 1)
+				  &oid, &full_name, 0) != 1) {
+			free(full_name);
 			continue;
+		}
 
 		if (refspecs.nr) {
 			char *private;
@@ -955,6 +957,7 @@  static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 			warning("%s: Unexpected object of type %s, skipping.",
 				e->name,
 				type_name(e->item->type));
+			free(full_name);
 			continue;
 		}
 
@@ -963,10 +966,12 @@  static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 			break;
 		case OBJ_BLOB:
 			export_blob(&commit->object.oid);
+			free(full_name);
 			continue;
 		default: /* OBJ_TAG (nested tags) is already handled */
 			warning("Tag points to object of unexpected type %s, skipping.",
 				type_name(commit->object.type));
+			free(full_name);
 			continue;
 		}
 
@@ -979,6 +984,8 @@  static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 
 		if (!*revision_sources_at(&revision_sources, commit))
 			*revision_sources_at(&revision_sources, commit) = full_name;
+		else
+			free(full_name);
 	}
 
 	string_list_sort(&extra_refs);
diff --git a/t/t9351-fast-export-anonymize.sh b/t/t9351-fast-export-anonymize.sh
index 156a647484..c0d9d7be75 100755
--- a/t/t9351-fast-export-anonymize.sh
+++ b/t/t9351-fast-export-anonymize.sh
@@ -4,6 +4,7 @@  test_description='basic tests for fast-export --anonymize'
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'setup simple repo' '