diff mbox series

[3/5] pack-write: pass hash_algo to `write_idx_file()`

Message ID 20250116-kn-the-repo-cleanup-v1-3-a2f4c8e1c4c3@gmail.com (mailing list archive)
State New
Headers show
Series pack-write: cleanup usage of global variables | expand

Commit Message

Karthik Nayak via B4 Relay Jan. 16, 2025, 11:35 a.m. UTC
From: Karthik Nayak <karthik.188@gmail.com>

The `write_idx_file()` function uses the global `the_hash_algo` variable
to access the repository's hash function. To avoid global variable
usage, pass the hash function from the layers above.

Altough the layers above could have access to the hash function
internally, simply pass in `the_hash_algo`. This avoids any
compatibility issues and bubbles up global variable usage to upper
layers which can be eventually resolved.

Signed-off-by: Karthik Nayak <karthik.188@gmail.com>
---
 builtin/fast-import.c  |  4 ++--
 builtin/index-pack.c   |  3 ++-
 builtin/pack-objects.c |  7 ++++---
 bulk-checkin.c         |  5 +++--
 pack-write.c           | 14 ++++++++------
 pack.h                 | 10 ++++++++--
 6 files changed, 27 insertions(+), 16 deletions(-)

Comments

Patrick Steinhardt Jan. 16, 2025, 1:24 p.m. UTC | #1
On Thu, Jan 16, 2025 at 12:35:15PM +0100, Karthik Nayak via B4 Relay wrote:
> @@ -546,7 +547,8 @@ void rename_tmp_packfile_idx(struct strbuf *name_buffer,
>  	rename_tmp_packfile(name_buffer, *idx_tmp_name, "idx");
>  }
>  
> -void stage_tmp_packfiles(struct strbuf *name_buffer,
> +void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
> +			 struct strbuf *name_buffer,
>  			 const char *pack_tmp_name,
>  			 struct pack_idx_entry **written_list,
>  			 uint32_t nr_written,

This change was somewhat unexpected to me as it wasn't mentioned, so it
makes you wonder why it's different than `wried_idx_file()`.

Patrick
Junio C Hamano Jan. 16, 2025, 7:12 p.m. UTC | #2
Karthik Nayak via B4 Relay
<devnull+karthik.188.gmail.com@kernel.org> writes:

> From: Karthik Nayak <karthik.188@gmail.com>
>
> The `write_idx_file()` function uses the global `the_hash_algo` variable
> to access the repository's hash function. To avoid global variable
> usage, pass the hash function from the layers above.
>
> Altough the layers above could have access to the hash function
> internally, simply pass in `the_hash_algo`. This avoids any
> compatibility issues and bubbles up global variable usage to upper
> layers which can be eventually resolved.
> ...
> -void stage_tmp_packfiles(struct strbuf *name_buffer,
> +void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
> +			 struct strbuf *name_buffer,
>  			 const char *pack_tmp_name,
>  			 struct pack_idx_entry **written_list,
>  			 uint32_t nr_written,
> @@ -561,8 +563,8 @@ void stage_tmp_packfiles(struct strbuf *name_buffer,
>  	if (adjust_shared_perm(pack_tmp_name))
>  		die_errno("unable to make temporary pack file readable");
>  
> -	*idx_tmp_name = (char *)write_idx_file(NULL, written_list, nr_written,
> -					       pack_idx_opts, hash);
> +	*idx_tmp_name = (char *)write_idx_file(hash_algo, NULL, written_list,
> +					       nr_written, pack_idx_opts, hash);

The proposed log message should mention the reason why this
stage_tmp_packfiles() function needs to be singled out among many
other direct callers of write_idx_file() function.  

In other words, ...

> @@ -798,8 +798,8 @@ static const char *create_index(void)
>  	if (c != last)
>  		die("internal consistency error creating the index");
>  
> -	tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts,
> -				 pack_data->hash);
> +	tmpfile = write_idx_file(the_hash_algo, NULL, idx, object_count,
> +				 &pack_idx_opts, pack_data->hash);
>  	free(idx);
>  	return tmpfile;
>  }

... this hunk could have made create_index() to take a git_hash_algo
object and pass it down to write_idx_file(), while changing all the
callers of create_index() pass the_hash_algo, but we did not do so.

But stage_tmp_packfiles() got that treatment.  Please tell your
readers in the proposed log message what makes it special.

Thanks.
Karthik Nayak Jan. 17, 2025, 8:56 a.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

> On Thu, Jan 16, 2025 at 12:35:15PM +0100, Karthik Nayak via B4 Relay wrote:
>> @@ -546,7 +547,8 @@ void rename_tmp_packfile_idx(struct strbuf *name_buffer,
>>  	rename_tmp_packfile(name_buffer, *idx_tmp_name, "idx");
>>  }
>>
>> -void stage_tmp_packfiles(struct strbuf *name_buffer,
>> +void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
>> +			 struct strbuf *name_buffer,
>>  			 const char *pack_tmp_name,
>>  			 struct pack_idx_entry **written_list,
>>  			 uint32_t nr_written,
>
> This change was somewhat unexpected to me as it wasn't mentioned, so it
> makes you wonder why it's different than `wried_idx_file()`.
>

Should've mentioned it. This was also modified since it exists in
'pack-write.c'. Will amend the commit message.

> Patrick
Karthik Nayak Jan. 17, 2025, 8:58 a.m. UTC | #4
Junio C Hamano <gitster@pobox.com> writes:

> Karthik Nayak via B4 Relay
> <devnull+karthik.188.gmail.com@kernel.org> writes:
>
>> From: Karthik Nayak <karthik.188@gmail.com>
>>
>> The `write_idx_file()` function uses the global `the_hash_algo` variable
>> to access the repository's hash function. To avoid global variable
>> usage, pass the hash function from the layers above.
>>
>> Altough the layers above could have access to the hash function
>> internally, simply pass in `the_hash_algo`. This avoids any
>> compatibility issues and bubbles up global variable usage to upper
>> layers which can be eventually resolved.
>> ...
>> -void stage_tmp_packfiles(struct strbuf *name_buffer,
>> +void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
>> +			 struct strbuf *name_buffer,
>>  			 const char *pack_tmp_name,
>>  			 struct pack_idx_entry **written_list,
>>  			 uint32_t nr_written,
>> @@ -561,8 +563,8 @@ void stage_tmp_packfiles(struct strbuf *name_buffer,
>>  	if (adjust_shared_perm(pack_tmp_name))
>>  		die_errno("unable to make temporary pack file readable");
>>
>> -	*idx_tmp_name = (char *)write_idx_file(NULL, written_list, nr_written,
>> -					       pack_idx_opts, hash);
>> +	*idx_tmp_name = (char *)write_idx_file(hash_algo, NULL, written_list,
>> +					       nr_written, pack_idx_opts, hash);
>
> The proposed log message should mention the reason why this
> stage_tmp_packfiles() function needs to be singled out among many
> other direct callers of write_idx_file() function.
>

Agreed, Patrick also pointed out the same.

> In other words, ...
>
>> @@ -798,8 +798,8 @@ static const char *create_index(void)
>>  	if (c != last)
>>  		die("internal consistency error creating the index");
>>
>> -	tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts,
>> -				 pack_data->hash);
>> +	tmpfile = write_idx_file(the_hash_algo, NULL, idx, object_count,
>> +				 &pack_idx_opts, pack_data->hash);
>>  	free(idx);
>>  	return tmpfile;
>>  }
>
> ... this hunk could have made create_index() to take a git_hash_algo
> object and pass it down to write_idx_file(), while changing all the
> callers of create_index() pass the_hash_algo, but we did not do so.
>
> But stage_tmp_packfiles() got that treatment.  Please tell your
> readers in the proposed log message what makes it special.
>
> Thanks.
>

Will fix it in the next version, but in short, it was because the
function is also part of 'pack-write.c' and we're focusing only on
cleanup of 'pack-write.c'.
diff mbox series

Patch

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 6baf2b1b71e2443a7987a41e0cd246076682bf58..c4bc52f93c011f34bd7f98c8e9d74a33cf9783bd 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -798,8 +798,8 @@  static const char *create_index(void)
 	if (c != last)
 		die("internal consistency error creating the index");
 
-	tmpfile = write_idx_file(NULL, idx, object_count, &pack_idx_opts,
-				 pack_data->hash);
+	tmpfile = write_idx_file(the_hash_algo, NULL, idx, object_count,
+				 &pack_idx_opts, pack_data->hash);
 	free(idx);
 	return tmpfile;
 }
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 6c5e3483f4fe67fb2e26132c55b1f8395d60c11f..d73699653a2227f8ecfee2c0f51cd680093ac764 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -2096,7 +2096,8 @@  int cmd_index_pack(int argc,
 	ALLOC_ARRAY(idx_objects, nr_objects);
 	for (i = 0; i < nr_objects; i++)
 		idx_objects[i] = &objects[i].idx;
-	curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_hash);
+	curr_index = write_idx_file(the_hash_algo, index_name, idx_objects,
+				    nr_objects, &opts, pack_hash);
 	if (rev_index)
 		curr_rev_index = write_rev_file(rev_index_name, idx_objects,
 						nr_objects, pack_hash,
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ffc62930b68c9b4152057572ede216381a4b0991..7b2dacda514c29f5393b604f15d379abb81546c0 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1369,9 +1369,10 @@  static void write_pack_file(void)
 			if (cruft)
 				pack_idx_opts.flags |= WRITE_MTIMES;
 
-			stage_tmp_packfiles(&tmpname, pack_tmp_name,
-					    written_list, nr_written,
-					    &to_pack, &pack_idx_opts, hash,
+			stage_tmp_packfiles(the_hash_algo, &tmpname,
+					    pack_tmp_name, written_list,
+					    nr_written, &to_pack,
+					    &pack_idx_opts, hash,
 					    &idx_tmp_name);
 
 			if (write_bitmap_index) {
diff --git a/bulk-checkin.c b/bulk-checkin.c
index c4b085f57f74fb8b998576ac9d84fed9e01ed0ed..0d49889bfbb233d58fc094f5e2c2d2433dad9851 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -44,8 +44,9 @@  static void finish_tmp_packfile(struct strbuf *basename,
 {
 	char *idx_tmp_name = NULL;
 
-	stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written,
-			    NULL, pack_idx_opts, hash, &idx_tmp_name);
+	stage_tmp_packfiles(the_hash_algo, basename, pack_tmp_name,
+			    written_list, nr_written, NULL, pack_idx_opts, hash,
+			    &idx_tmp_name);
 	rename_tmp_packfile_idx(basename, &idx_tmp_name);
 
 	free(idx_tmp_name);
diff --git a/pack-write.c b/pack-write.c
index 0cd75d2e55419362a61cf981fc11117ea7a1d88a..f344e78a9ec20cea9812a5eaffc72ae0b7e7424d 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -56,7 +56,8 @@  static int need_large_offset(off_t offset, const struct pack_idx_option *opts)
  * The *sha1 contains the pack content SHA1 hash.
  * The objects array passed in will be sorted by SHA1 on exit.
  */
-const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects,
+const char *write_idx_file(const struct git_hash_algo *hash_algo,
+			   const char *index_name, struct pack_idx_entry **objects,
 			   int nr_objects, const struct pack_idx_option *opts,
 			   const unsigned char *sha1)
 {
@@ -130,7 +131,7 @@  const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
 		struct pack_idx_entry *obj = *list++;
 		if (index_version < 2)
 			hashwrite_be32(f, obj->offset);
-		hashwrite(f, obj->oid.hash, the_hash_algo->rawsz);
+		hashwrite(f, obj->oid.hash, hash_algo->rawsz);
 		if ((opts->flags & WRITE_IDX_STRICT) &&
 		    (i && oideq(&list[-2]->oid, &obj->oid)))
 			die("The same object %s appears twice in the pack",
@@ -172,7 +173,7 @@  const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
 		}
 	}
 
-	hashwrite(f, sha1, the_hash_algo->rawsz);
+	hashwrite(f, sha1, hash_algo->rawsz);
 	finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
 			  CSUM_HASH_IN_STREAM | CSUM_CLOSE |
 			  ((opts->flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
@@ -546,7 +547,8 @@  void rename_tmp_packfile_idx(struct strbuf *name_buffer,
 	rename_tmp_packfile(name_buffer, *idx_tmp_name, "idx");
 }
 
-void stage_tmp_packfiles(struct strbuf *name_buffer,
+void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
+			 struct strbuf *name_buffer,
 			 const char *pack_tmp_name,
 			 struct pack_idx_entry **written_list,
 			 uint32_t nr_written,
@@ -561,8 +563,8 @@  void stage_tmp_packfiles(struct strbuf *name_buffer,
 	if (adjust_shared_perm(pack_tmp_name))
 		die_errno("unable to make temporary pack file readable");
 
-	*idx_tmp_name = (char *)write_idx_file(NULL, written_list, nr_written,
-					       pack_idx_opts, hash);
+	*idx_tmp_name = (char *)write_idx_file(hash_algo, NULL, written_list,
+					       nr_written, pack_idx_opts, hash);
 	if (adjust_shared_perm(*idx_tmp_name))
 		die_errno("unable to make temporary index file readable");
 
diff --git a/pack.h b/pack.h
index 46d85e5bec787c90af69700fd4b328b1ebf1d606..c650fdbe2dcde8055ad0efe55646338cd0f81df5 100644
--- a/pack.h
+++ b/pack.h
@@ -86,7 +86,12 @@  struct progress;
 /* Note, the data argument could be NULL if object type is blob */
 typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*);
 
-const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1);
+const char *write_idx_file(const struct git_hash_algo *hash_algo,
+			   const char *index_name,
+			   struct pack_idx_entry **objects,
+			   int nr_objects,
+			   const struct pack_idx_option *,
+			   const unsigned char *sha1);
 int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
 int verify_pack_index(struct packed_git *);
 int verify_pack(struct repository *, struct packed_git *, verify_fn fn, struct progress *, uint32_t);
@@ -119,7 +124,8 @@  int read_pack_header(int fd, struct pack_header *);
 struct packing_data;
 
 struct hashfile *create_tmp_packfile(char **pack_tmp_name);
-void stage_tmp_packfiles(struct strbuf *name_buffer,
+void stage_tmp_packfiles(const struct git_hash_algo *hash_algo,
+			 struct strbuf *name_buffer,
 			 const char *pack_tmp_name,
 			 struct pack_idx_entry **written_list,
 			 uint32_t nr_written,