diff mbox series

[06/22] read-cache: fix leaking hashfile when writing index fails

Message ID 2810cada0af14c3331674a807b20d18e6c9af022.1722933642.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series Memory leak fixes (pt.4) | expand

Commit Message

Patrick Steinhardt Aug. 6, 2024, 9 a.m. UTC
In `do_write_index()`, we use a `struct hashfile` to write the index
with a trailer hash. In case the write fails though, we never clean up
the allocated `hashfile` state and thus leak memory.

Refactor the code to have a common exit path where we can free this and
other allocated memory. While at it, refactor our use of `strbuf`s such
that we reuse the same buffer to avoid some unneeded allocations.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 read-cache.c                       | 97 ++++++++++++++++++------------
 t/t1601-index-bogus.sh             |  2 +
 t/t2107-update-index-basic.sh      |  1 +
 t/t7008-filter-branch-null-sha1.sh |  1 +
 4 files changed, 62 insertions(+), 39 deletions(-)

Comments

James Liu Aug. 7, 2024, 7:01 a.m. UTC | #1
On Tue Aug 6, 2024 at 7:00 PM AEST, Patrick Steinhardt wrote:
> Refactor the code to have a common exit path where we can free this and
> other allocated memory. While at it, refactor our use of `strbuf`s such
> that we reuse the same buffer to avoid some unneeded allocations.
>
> @@ -3105,7 +3117,14 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
>  	trace2_data_intmax("index", the_repository, "write/cache_nr",
>  			   istate->cache_nr);
>  
> -	return 0;
> +	ret = 0;
> +
> +out:
> +	if (f)
> +		free_hashfile(f);
> +	strbuf_release(&sb);
> +	free(ieot);
> +	return ret;
>  }

Is it generally a pattern in Git to use `goto <label>` instead of
returns when there are multiple return points in a function? We're also
performing cleanup duties here and in most of those scenarios but there
are some cases like `reftable_be_pack_refs()` where the goto simply
collapses multiple return points into a single path.
Patrick Steinhardt Aug. 8, 2024, 5:04 a.m. UTC | #2
On Wed, Aug 07, 2024 at 05:01:17PM +1000, James Liu wrote:
> On Tue Aug 6, 2024 at 7:00 PM AEST, Patrick Steinhardt wrote:
> > Refactor the code to have a common exit path where we can free this and
> > other allocated memory. While at it, refactor our use of `strbuf`s such
> > that we reuse the same buffer to avoid some unneeded allocations.
> >
> > @@ -3105,7 +3117,14 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
> >  	trace2_data_intmax("index", the_repository, "write/cache_nr",
> >  			   istate->cache_nr);
> >  
> > -	return 0;
> > +	ret = 0;
> > +
> > +out:
> > +	if (f)
> > +		free_hashfile(f);
> > +	strbuf_release(&sb);
> > +	free(ieot);
> > +	return ret;
> >  }
> 
> Is it generally a pattern in Git to use `goto <label>` instead of
> returns when there are multiple return points in a function? We're also
> performing cleanup duties here and in most of those scenarios but there
> are some cases like `reftable_be_pack_refs()` where the goto simply
> collapses multiple return points into a single path.

Yes, that's usually how we avoid repetetive cleanup code for each of the
return paths. `reftable_be_pack_refs()` is a bit more on the curious
side as the common exit path doesn't really do helpful. That one could
have just as well used plain returns.

Patrick
diff mbox series

Patch

diff --git a/read-cache.c b/read-cache.c
index 48bf24f87c..36821fe5b5 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2840,8 +2840,9 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	int csum_fsync_flag;
 	int ieot_entries = 1;
 	struct index_entry_offset_table *ieot = NULL;
-	int nr, nr_threads;
 	struct repository *r = istate->repo;
+	struct strbuf sb = STRBUF_INIT;
+	int nr, nr_threads, ret;
 
 	f = hashfd(tempfile->fd, tempfile->filename.buf);
 
@@ -2962,8 +2963,8 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	strbuf_release(&previous_name_buf);
 
 	if (err) {
-		free(ieot);
-		return err;
+		ret = err;
+		goto out;
 	}
 
 	offset = hashfile_total(f);
@@ -2985,20 +2986,20 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	 * index.
 	 */
 	if (ieot) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		write_ieot_extension(&sb, ieot);
 		err = write_index_ext_header(f, eoie_c, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		free(ieot);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 
 	if (write_extensions & WRITE_SPLIT_INDEX_EXTENSION &&
 	    istate->split_index) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		if (istate->sparse_index)
 			die(_("cannot write split index for a sparse index"));
@@ -3007,59 +3008,66 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 			write_index_ext_header(f, eoie_c, CACHE_EXT_LINK,
 					       sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 	if (write_extensions & WRITE_CACHE_TREE_EXTENSION &&
 	    !drop_cache_tree && istate->cache_tree) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		cache_tree_write(&sb, istate->cache_tree);
 		err = write_index_ext_header(f, eoie_c, CACHE_EXT_TREE, sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 	if (write_extensions & WRITE_RESOLVE_UNDO_EXTENSION &&
 	    istate->resolve_undo) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		resolve_undo_write(&sb, istate->resolve_undo);
 		err = write_index_ext_header(f, eoie_c, CACHE_EXT_RESOLVE_UNDO,
 					     sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 	if (write_extensions & WRITE_UNTRACKED_CACHE_EXTENSION &&
 	    istate->untracked) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		write_untracked_extension(&sb, istate->untracked);
 		err = write_index_ext_header(f, eoie_c, CACHE_EXT_UNTRACKED,
 					     sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 	if (write_extensions & WRITE_FSMONITOR_EXTENSION &&
 	    istate->fsmonitor_last_update) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		write_fsmonitor_extension(&sb, istate);
 		err = write_index_ext_header(f, eoie_c, CACHE_EXT_FSMONITOR, sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 	if (istate->sparse_index) {
-		if (write_index_ext_header(f, eoie_c, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0)
-			return -1;
+		if (write_index_ext_header(f, eoie_c, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0) {
+			ret = -1;
+			goto out;
+		}
 	}
 
 	/*
@@ -3069,14 +3077,15 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	 * when loading the shared index.
 	 */
 	if (eoie_c) {
-		struct strbuf sb = STRBUF_INIT;
+		strbuf_reset(&sb);
 
 		write_eoie_extension(&sb, eoie_c, offset);
 		err = write_index_ext_header(f, NULL, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0;
 		hashwrite(f, sb.buf, sb.len);
-		strbuf_release(&sb);
-		if (err)
-			return -1;
+		if (err) {
+			ret = -1;
+			goto out;
+		}
 	}
 
 	csum_fsync_flag = 0;
@@ -3085,13 +3094,16 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 
 	finalize_hashfile(f, istate->oid.hash, FSYNC_COMPONENT_INDEX,
 			  CSUM_HASH_IN_STREAM | csum_fsync_flag);
+	f = NULL;
 
 	if (close_tempfile_gently(tempfile)) {
-		error(_("could not close '%s'"), get_tempfile_path(tempfile));
-		return -1;
+		ret = error(_("could not close '%s'"), get_tempfile_path(tempfile));
+		goto out;
+	}
+	if (stat(get_tempfile_path(tempfile), &st)) {
+		ret = -1;
+		goto out;
 	}
-	if (stat(get_tempfile_path(tempfile), &st))
-		return -1;
 	istate->timestamp.sec = (unsigned int)st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 	trace_performance_since(start, "write index, changed mask = %x", istate->cache_changed);
@@ -3105,7 +3117,14 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	trace2_data_intmax("index", the_repository, "write/cache_nr",
 			   istate->cache_nr);
 
-	return 0;
+	ret = 0;
+
+out:
+	if (f)
+		free_hashfile(f);
+	strbuf_release(&sb);
+	free(ieot);
+	return ret;
 }
 
 void set_alternate_index_output(const char *name)
diff --git a/t/t1601-index-bogus.sh b/t/t1601-index-bogus.sh
index 4171f1e141..5dcc101882 100755
--- a/t/t1601-index-bogus.sh
+++ b/t/t1601-index-bogus.sh
@@ -1,6 +1,8 @@ 
 #!/bin/sh
 
 test_description='test handling of bogus index entries'
+
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'create tree with null sha1' '
diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh
index cc72ead79f..f0eab13f96 100755
--- a/t/t2107-update-index-basic.sh
+++ b/t/t2107-update-index-basic.sh
@@ -5,6 +5,7 @@  test_description='basic update-index tests
 Tests for command-line parsing and basic operation.
 '
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'update-index --nonsense fails' '
diff --git a/t/t7008-filter-branch-null-sha1.sh b/t/t7008-filter-branch-null-sha1.sh
index 93fbc92b8d..0ce8fd2c89 100755
--- a/t/t7008-filter-branch-null-sha1.sh
+++ b/t/t7008-filter-branch-null-sha1.sh
@@ -2,6 +2,7 @@ 
 
 test_description='filter-branch removal of trees with null sha1'
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 test_expect_success 'setup: base commits' '