diff mbox series

[2/9] bulk-checkin.c: store checksum directly

Message ID 20b35ce0505d7bcc84b6fd68a33e0b6b1afa31e6.1631157880.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series packfile: avoid .idx rename races | expand

Commit Message

Taylor Blau Sept. 9, 2021, 3:24 a.m. UTC
finish_bulk_checkin() stores the checksum from finalize_hashfile() by
writing to the `hash` member of `struct object_id`, but that hash has
nothing to do with an object id (it's just a convenient location that
happens to be sized correctly).

Store the hash directly in an unsigned char array. This behaves the same
as writing to the `hash` member, but makes the intent clearer (and
avoids allocating an extra four bytes for the `algo` member of `struct
object_id`).

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 bulk-checkin.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Sept. 9, 2021, 7:38 a.m. UTC | #1
On Wed, Sep 08 2021, Taylor Blau wrote:

> finish_bulk_checkin() stores the checksum from finalize_hashfile() by
> writing to the `hash` member of `struct object_id`, but that hash has
> nothing to do with an object id (it's just a convenient location that
> happens to be sized correctly).
>
> Store the hash directly in an unsigned char array. This behaves the same
> as writing to the `hash` member, but makes the intent clearer (and
> avoids allocating an extra four bytes for the `algo` member of `struct
> object_id`).
>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  bulk-checkin.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/bulk-checkin.c b/bulk-checkin.c
> index b023d9959a..6283bc8bd9 100644
> --- a/bulk-checkin.c
> +++ b/bulk-checkin.c
> @@ -25,7 +25,7 @@ static struct bulk_checkin_state {
>  
>  static void finish_bulk_checkin(struct bulk_checkin_state *state)
>  {
> -	struct object_id oid;
> +	unsigned char hash[GIT_MAX_RAWSZ];
>  	struct strbuf packname = STRBUF_INIT;
>  	int i;
>  
> @@ -37,11 +37,11 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
>  		unlink(state->pack_tmp_name);
>  		goto clear_exit;
>  	} else if (state->nr_written == 1) {
> -		finalize_hashfile(state->f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
> +		finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
>  	} else {
> -		int fd = finalize_hashfile(state->f, oid.hash, 0);
> -		fixup_pack_header_footer(fd, oid.hash, state->pack_tmp_name,
> -					 state->nr_written, oid.hash,
> +		int fd = finalize_hashfile(state->f, hash, 0);
> +		fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
> +					 state->nr_written, hash,
>  					 state->offset);
>  		close(fd);
>  	}
> @@ -49,7 +49,7 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
>  	strbuf_addf(&packname, "%s/pack/pack-", get_object_directory());
>  	finish_tmp_packfile(&packname, state->pack_tmp_name,
>  			    state->written, state->nr_written,
> -			    &state->pack_idx_opts, oid.hash);
> +			    &state->pack_idx_opts, hash);
>  	for (i = 0; i < state->nr_written; i++)
>  		free(state->written[i]);

[This commit looks good, nothing needs to be fixed here, just a
digression]:

This is a good change, FWIW I came up with in my version, and then ended
up ejecting it to resist the urge to go on some general cleanup spree, I
guess we can just fix *this* one :)

Anyway, I agree that this should be in & having it is good.

The code pattern being fixed here is more insidious than your commit
message describes though, since fa33c3aae23 (bulk-checkin.c: convert to
use struct object_id, 2015-03-13) when this was converted from "unsigned
char sha1[20]" to "struct object_id" we've had a landmine waiting to be
stepped on here.


The "oid" automatic variable will get initialized to some garbage, we
then write to just the "hash" member, but leave the "algo" member in
some undefined state.

Thus when you e.g. call oid_to_hex(&oid) here you might get a segfault,
e.g. on my box because in hex.c we'll try to access
&hash_algos[oid->algo], where oid->algo happens to be set to the garbage
-9872.

If somebody's interested in some follow-up cleanup renaming the "hash"
member to say "hash2" in hash.h, and compiling with "make -k" will find
all the sites that are using it directly,

I looked in detail at a few, and many can either be converted to just
use "struct object_id" without playing around with "oid.hash", and some
share the bug/landmine being fixed here.
diff mbox series

Patch

diff --git a/bulk-checkin.c b/bulk-checkin.c
index b023d9959a..6283bc8bd9 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -25,7 +25,7 @@  static struct bulk_checkin_state {
 
 static void finish_bulk_checkin(struct bulk_checkin_state *state)
 {
-	struct object_id oid;
+	unsigned char hash[GIT_MAX_RAWSZ];
 	struct strbuf packname = STRBUF_INIT;
 	int i;
 
@@ -37,11 +37,11 @@  static void finish_bulk_checkin(struct bulk_checkin_state *state)
 		unlink(state->pack_tmp_name);
 		goto clear_exit;
 	} else if (state->nr_written == 1) {
-		finalize_hashfile(state->f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+		finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 	} else {
-		int fd = finalize_hashfile(state->f, oid.hash, 0);
-		fixup_pack_header_footer(fd, oid.hash, state->pack_tmp_name,
-					 state->nr_written, oid.hash,
+		int fd = finalize_hashfile(state->f, hash, 0);
+		fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
+					 state->nr_written, hash,
 					 state->offset);
 		close(fd);
 	}
@@ -49,7 +49,7 @@  static void finish_bulk_checkin(struct bulk_checkin_state *state)
 	strbuf_addf(&packname, "%s/pack/pack-", get_object_directory());
 	finish_tmp_packfile(&packname, state->pack_tmp_name,
 			    state->written, state->nr_written,
-			    &state->pack_idx_opts, oid.hash);
+			    &state->pack_idx_opts, hash);
 	for (i = 0; i < state->nr_written; i++)
 		free(state->written[i]);