diff mbox series

[03/28] fetch-pack: fix leaking sought refs

Message ID 20240924215103.GC1143820@coredump.intra.peff.net (mailing list archive)
State Accepted
Commit 6f54d00439759feab82d40577d272c57d449a554
Headers show
Series leak fixes for http fetch/push | expand

Commit Message

Jeff King Sept. 24, 2024, 9:51 p.m. UTC
From: Patrick Steinhardt <ps@pks.im>

When calling `fetch_pack()` the caller is expected to pass in a set of
sought-after refs that they want to fetch. This array gets massaged to
not contain duplicate entries, which is done by replacing duplicate refs
with `NULL` pointers. This modifies the caller-provided array, and in
case we do unset any pointers the caller now loses track of that ref and
cannot free it anymore.

Now the obvious fix would be to not only unset these pointers, but to
also free their contents. But this doesn't work because callers continue
to use those refs. Another potential solution would be to copy the array
in `fetch_pack()` so that we dont modify the caller-provided one. But
that doesn't work either because the NULL-ness of those entries is used
by callers to skip over ref entries that we didn't even try to fetch in
`report_unmatched_refs()`.

Instead, we make it the responsibility of our callers to duplicate these
arrays as needed. It ain't pretty, but it works to plug the memory leak.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/fetch-pack.c   | 11 ++++++++++-
 t/t5700-protocol-v1.sh |  1 +
 transport.c            | 11 ++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

Comments

René Scharfe Sept. 25, 2024, 5:17 p.m. UTC | #1
Am 24.09.24 um 23:51 schrieb Jeff King:
> From: Patrick Steinhardt <ps@pks.im>
>
> When calling `fetch_pack()` the caller is expected to pass in a set of
> sought-after refs that they want to fetch. This array gets massaged to
> not contain duplicate entries, which is done by replacing duplicate refs
> with `NULL` pointers. This modifies the caller-provided array, and in
> case we do unset any pointers the caller now loses track of that ref and
> cannot free it anymore.
>
> Now the obvious fix would be to not only unset these pointers, but to
> also free their contents. But this doesn't work because callers continue
> to use those refs. Another potential solution would be to copy the array
> in `fetch_pack()` so that we dont modify the caller-provided one. But
> that doesn't work either because the NULL-ness of those entries is used
> by callers to skip over ref entries that we didn't even try to fetch in
> `report_unmatched_refs()`.
>
> Instead, we make it the responsibility of our callers to duplicate these
> arrays as needed. It ain't pretty, but it works to plug the memory leak.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> Signed-off-by: Jeff King <peff@peff.net>
> ---
>  builtin/fetch-pack.c   | 11 ++++++++++-
>  t/t5700-protocol-v1.sh |  1 +
>  transport.c            | 11 ++++++++++-
>  3 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
> index 49222a36fa..c5319232a5 100644
> --- a/builtin/fetch-pack.c
> +++ b/builtin/fetch-pack.c
> @@ -53,6 +53,7 @@ int cmd_fetch_pack(int argc,
>  	struct ref *fetched_refs = NULL, *remote_refs = NULL;
>  	const char *dest = NULL;
>  	struct ref **sought = NULL;
> +	struct ref **sought_to_free = NULL;
>  	int nr_sought = 0, alloc_sought = 0;
>  	int fd[2];
>  	struct string_list pack_lockfiles = STRING_LIST_INIT_DUP;
> @@ -243,6 +244,13 @@ int cmd_fetch_pack(int argc,
>  		BUG("unknown protocol version");
>  	}
>
> +	/*
> +	 * Create a shallow copy of `sought` so that we can free all of its entries.
> +	 * This is because `fetch_pack()` will modify the array to evict some
> +	 * entries, but won't free those.
> +	 */
> +	DUP_ARRAY(sought_to_free, sought, nr_sought);
> +
>  	fetched_refs = fetch_pack(&args, fd, remote_refs, sought, nr_sought,
>  			 &shallow, pack_lockfiles_ptr, version);
>
> @@ -280,7 +288,8 @@ int cmd_fetch_pack(int argc,
>  		       oid_to_hex(&ref->old_oid), ref->name);
>
>  	for (size_t i = 0; i < nr_sought; i++)
> -		free_one_ref(sought[i]);
> +		free_one_ref(sought_to_free[i]);
> +	free(sought_to_free);

OK.

>  	free(sought);
>  	free_refs(fetched_refs);
>  	free_refs(remote_refs);
> diff --git a/t/t5700-protocol-v1.sh b/t/t5700-protocol-v1.sh
> index a73b4d4ff6..985e04d06e 100755
> --- a/t/t5700-protocol-v1.sh
> +++ b/t/t5700-protocol-v1.sh
> @@ -11,6 +11,7 @@ export GIT_TEST_PROTOCOL_VERSION
>  GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
>  export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
>
> +TEST_PASSES_SANITIZE_LEAK=true
>  . ./test-lib.sh
>
>  # Test protocol v1 with 'git://' transport
> diff --git a/transport.c b/transport.c
> index 3c4714581f..1098bbd60e 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -414,7 +414,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	struct git_transport_data *data = transport->data;
>  	struct ref *refs = NULL;
>  	struct fetch_pack_args args;
> -	struct ref *refs_tmp = NULL;
> +	struct ref *refs_tmp = NULL, **to_fetch_dup = NULL;
>
>  	memset(&args, 0, sizeof(args));
>  	args.uploadpack = data->options.uploadpack;
> @@ -477,6 +477,14 @@ static int fetch_refs_via_pack(struct transport *transport,
>  		goto cleanup;
>  	}
>
> +	/*
> +	 * Create a shallow copy of `sought` so that we can free all of its entries.
> +	 * This is because `fetch_pack()` will modify the array to evict some
> +	 * entries, but won't free those.
> +	 */
> +	DUP_ARRAY(to_fetch_dup, to_fetch, nr_heads);
> +	to_fetch = to_fetch_dup;
> +
>  	refs = fetch_pack(&args, data->fd,
>  			  refs_tmp ? refs_tmp : transport->remote_refs,
>  			  to_fetch, nr_heads, &data->shallow,
> @@ -500,6 +508,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  		ret = -1;
>  	data->conn = NULL;
>
> +	free(to_fetch_dup);

This makes a shallow copy and passes it to fetch_pack() and later to
report_unmatched_refs().  It shields callers of fetch_refs_via_pack()
from the effect of fetch_pack()'s NULLification.  This is what the
commit message says would not work.  What am I missing?

>  	free_refs(refs_tmp);
>  	free_refs(refs);
>  	list_objects_filter_release(&args.filter_options);
Patrick Steinhardt Sept. 26, 2024, 11:52 a.m. UTC | #2
On Wed, Sep 25, 2024 at 07:17:24PM +0200, René Scharfe wrote:
> Am 24.09.24 um 23:51 schrieb Jeff King:
> > From: Patrick Steinhardt <ps@pks.im>
> > diff --git a/transport.c b/transport.c
> > index 3c4714581f..1098bbd60e 100644
> > --- a/transport.c
> > +++ b/transport.c
> > @@ -414,7 +414,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >  	struct git_transport_data *data = transport->data;
> >  	struct ref *refs = NULL;
> >  	struct fetch_pack_args args;
> > -	struct ref *refs_tmp = NULL;
> > +	struct ref *refs_tmp = NULL, **to_fetch_dup = NULL;
> >
> >  	memset(&args, 0, sizeof(args));
> >  	args.uploadpack = data->options.uploadpack;
> > @@ -477,6 +477,14 @@ static int fetch_refs_via_pack(struct transport *transport,
> >  		goto cleanup;
> >  	}
> >
> > +	/*
> > +	 * Create a shallow copy of `sought` so that we can free all of its entries.
> > +	 * This is because `fetch_pack()` will modify the array to evict some
> > +	 * entries, but won't free those.
> > +	 */
> > +	DUP_ARRAY(to_fetch_dup, to_fetch, nr_heads);
> > +	to_fetch = to_fetch_dup;
> > +
> >  	refs = fetch_pack(&args, data->fd,
> >  			  refs_tmp ? refs_tmp : transport->remote_refs,
> >  			  to_fetch, nr_heads, &data->shallow,
> > @@ -500,6 +508,7 @@ static int fetch_refs_via_pack(struct transport *transport,
> >  		ret = -1;
> >  	data->conn = NULL;
> >
> > +	free(to_fetch_dup);
> 
> This makes a shallow copy and passes it to fetch_pack() and later to
> report_unmatched_refs().  It shields callers of fetch_refs_via_pack()
> from the effect of fetch_pack()'s NULLification.  This is what the
> commit message says would not work.  What am I missing?

`fetch_refs_via_pack()` is called via `transport_fetch_refs()`, which
constructs the array of refs to fetch ad-hoc and then discards it
immediately. So it doesn't ever inspect the modified array in the first
place, and consequently we can guard against the NULLification here.

This code is quite intricate :/

Patrick
diff mbox series

Patch

diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 49222a36fa..c5319232a5 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -53,6 +53,7 @@  int cmd_fetch_pack(int argc,
 	struct ref *fetched_refs = NULL, *remote_refs = NULL;
 	const char *dest = NULL;
 	struct ref **sought = NULL;
+	struct ref **sought_to_free = NULL;
 	int nr_sought = 0, alloc_sought = 0;
 	int fd[2];
 	struct string_list pack_lockfiles = STRING_LIST_INIT_DUP;
@@ -243,6 +244,13 @@  int cmd_fetch_pack(int argc,
 		BUG("unknown protocol version");
 	}
 
+	/*
+	 * Create a shallow copy of `sought` so that we can free all of its entries.
+	 * This is because `fetch_pack()` will modify the array to evict some
+	 * entries, but won't free those.
+	 */
+	DUP_ARRAY(sought_to_free, sought, nr_sought);
+
 	fetched_refs = fetch_pack(&args, fd, remote_refs, sought, nr_sought,
 			 &shallow, pack_lockfiles_ptr, version);
 
@@ -280,7 +288,8 @@  int cmd_fetch_pack(int argc,
 		       oid_to_hex(&ref->old_oid), ref->name);
 
 	for (size_t i = 0; i < nr_sought; i++)
-		free_one_ref(sought[i]);
+		free_one_ref(sought_to_free[i]);
+	free(sought_to_free);
 	free(sought);
 	free_refs(fetched_refs);
 	free_refs(remote_refs);
diff --git a/t/t5700-protocol-v1.sh b/t/t5700-protocol-v1.sh
index a73b4d4ff6..985e04d06e 100755
--- a/t/t5700-protocol-v1.sh
+++ b/t/t5700-protocol-v1.sh
@@ -11,6 +11,7 @@  export GIT_TEST_PROTOCOL_VERSION
 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
 
+TEST_PASSES_SANITIZE_LEAK=true
 . ./test-lib.sh
 
 # Test protocol v1 with 'git://' transport
diff --git a/transport.c b/transport.c
index 3c4714581f..1098bbd60e 100644
--- a/transport.c
+++ b/transport.c
@@ -414,7 +414,7 @@  static int fetch_refs_via_pack(struct transport *transport,
 	struct git_transport_data *data = transport->data;
 	struct ref *refs = NULL;
 	struct fetch_pack_args args;
-	struct ref *refs_tmp = NULL;
+	struct ref *refs_tmp = NULL, **to_fetch_dup = NULL;
 
 	memset(&args, 0, sizeof(args));
 	args.uploadpack = data->options.uploadpack;
@@ -477,6 +477,14 @@  static int fetch_refs_via_pack(struct transport *transport,
 		goto cleanup;
 	}
 
+	/*
+	 * Create a shallow copy of `sought` so that we can free all of its entries.
+	 * This is because `fetch_pack()` will modify the array to evict some
+	 * entries, but won't free those.
+	 */
+	DUP_ARRAY(to_fetch_dup, to_fetch, nr_heads);
+	to_fetch = to_fetch_dup;
+
 	refs = fetch_pack(&args, data->fd,
 			  refs_tmp ? refs_tmp : transport->remote_refs,
 			  to_fetch, nr_heads, &data->shallow,
@@ -500,6 +508,7 @@  static int fetch_refs_via_pack(struct transport *transport,
 		ret = -1;
 	data->conn = NULL;
 
+	free(to_fetch_dup);
 	free_refs(refs_tmp);
 	free_refs(refs);
 	list_objects_filter_release(&args.filter_options);