diff mbox series

[2/8] midx-write.c: reduce argument count for `get_sorted_entries()`

Message ID 07dad5a5812794be6e355b1e0eb3722d452f292b.1716482279.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series midx-write: miscellaneous clean-ups for incremental MIDXs | expand

Commit Message

Taylor Blau May 23, 2024, 4:38 p.m. UTC
The function `midx-write.c::get_sorted_entries()` is responsible for
constructing the array of OIDs from a given list of packs which will
comprise the MIDX being written.

The singular call-site for this function looks something like:

    ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr,
                                     &ctx.entries_nr,
                                     ctx.preferred_pack_idx);

This function has five formal arguments, all of which are members of the
shared `struct write_midx_context` used to track various pieces of
information about the MIDX being written.

The function `get_sorted_entries()` dates back to fe1ed56f5e4 (midx:
sort and deduplicate objects from packfiles, 2018-07-12), which came
shortly after 396f257018a (multi-pack-index: read packfile list,
2018-07-12). The latter patch introduced the `pack_list` structure,
which was a precursor to the structure we now know as
`write_midx_context` (c.f. 577dc49696a (midx: rename pack_info to
write_midx_context, 2021-02-18)).

At the time, `get_sorted_entries()` likely could have used the pack_list
structure introduced earlier in 396f257018a, but understandably did not
since the structure only contained three fields (only two of which were
relevant to `get_sorted_entries()`) at the time.

Simplify the declaration of this function by taking a single pointer to
the whole `struct write_midx_context` instead of various members within
it.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx-write.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

Comments

Patrick Steinhardt May 29, 2024, 7:47 a.m. UTC | #1
On Thu, May 23, 2024 at 12:38:06PM -0400, Taylor Blau wrote:
> diff --git a/midx-write.c b/midx-write.c
> index 03e95ae821..ad32e8953d 100644
> --- a/midx-write.c
> +++ b/midx-write.c
> @@ -299,21 +299,17 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
>   * Copy only the de-duplicated entries (selected by most-recent modified time
>   * of a packfile containing the object).
>   */
> -static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
> -						  struct pack_info *info,
> -						  uint32_t nr_packs,
> -						  size_t *nr_objects,
> -						  int preferred_pack)
> +static struct pack_midx_entry *get_sorted_entries(struct write_midx_context *ctx)
>  {
>  	uint32_t cur_fanout, cur_pack, cur_object;
>  	size_t alloc_objects, total_objects = 0;
>  	struct midx_fanout fanout = { 0 };
>  	struct pack_midx_entry *deduplicated_entries = NULL;
> -	uint32_t start_pack = m ? m->num_packs : 0;
> +	uint32_t start_pack = ctx->m ? ctx->m->num_packs : 0;
>  
> -	for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
> +	for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
>  		total_objects = st_add(total_objects,
> -				       info[cur_pack].p->num_objects);
> +				       ctx->info[cur_pack].p->num_objects);
>  
>  	/*
>  	 * As we de-duplicate by fanout value, we expect the fanout
> @@ -324,25 +320,25 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>  
>  	ALLOC_ARRAY(fanout.entries, fanout.alloc);
>  	ALLOC_ARRAY(deduplicated_entries, alloc_objects);
> -	*nr_objects = 0;
> +	ctx->entries_nr = 0;

Nit: I think it's a bit surprising that a getter function would modify
the passed in structure. It's also a bit puzzling that we assign
`entries_nr` in here, but rely on the caller to set the corresponding
`entries` field. I think we should either have the caller assign both
fields, or we should rename the function and assign both of these fields
in the function.

Patrick
Taylor Blau May 29, 2024, 10:35 p.m. UTC | #2
On Wed, May 29, 2024 at 09:47:57AM +0200, Patrick Steinhardt wrote:
> Nit: I think it's a bit surprising that a getter function would modify
> the passed in structure. It's also a bit puzzling that we assign
> `entries_nr` in here, but rely on the caller to set the corresponding
> `entries` field. I think we should either have the caller assign both
> fields, or we should rename the function and assign both of these fields
> in the function.

Thanks, I agree and should have changed the name of this function in the
existing round.

I renamed it to "compute_sorted_entries()" which takes only a single
"struct write_midx_context *", and assigns both "ctx->entries" and
"ctx->entries_nr".

Thanks,
Taylor
diff mbox series

Patch

diff --git a/midx-write.c b/midx-write.c
index 03e95ae821..ad32e8953d 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -299,21 +299,17 @@  static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
  * Copy only the de-duplicated entries (selected by most-recent modified time
  * of a packfile containing the object).
  */
-static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
-						  struct pack_info *info,
-						  uint32_t nr_packs,
-						  size_t *nr_objects,
-						  int preferred_pack)
+static struct pack_midx_entry *get_sorted_entries(struct write_midx_context *ctx)
 {
 	uint32_t cur_fanout, cur_pack, cur_object;
 	size_t alloc_objects, total_objects = 0;
 	struct midx_fanout fanout = { 0 };
 	struct pack_midx_entry *deduplicated_entries = NULL;
-	uint32_t start_pack = m ? m->num_packs : 0;
+	uint32_t start_pack = ctx->m ? ctx->m->num_packs : 0;
 
-	for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
+	for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
 		total_objects = st_add(total_objects,
-				       info[cur_pack].p->num_objects);
+				       ctx->info[cur_pack].p->num_objects);
 
 	/*
 	 * As we de-duplicate by fanout value, we expect the fanout
@@ -324,25 +320,25 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 
 	ALLOC_ARRAY(fanout.entries, fanout.alloc);
 	ALLOC_ARRAY(deduplicated_entries, alloc_objects);
-	*nr_objects = 0;
+	ctx->entries_nr = 0;
 
 	for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
 		fanout.nr = 0;
 
-		if (m)
-			midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
-						    preferred_pack);
+		if (ctx->m)
+			midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
+						    ctx->preferred_pack_idx);
 
-		for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
-			int preferred = cur_pack == preferred_pack;
+		for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
+			int preferred = cur_pack == ctx->preferred_pack_idx;
 			midx_fanout_add_pack_fanout(&fanout,
-						    info, cur_pack,
+						    ctx->info, cur_pack,
 						    preferred, cur_fanout);
 		}
 
-		if (-1 < preferred_pack && preferred_pack < start_pack)
-			midx_fanout_add_pack_fanout(&fanout, info,
-						    preferred_pack, 1,
+		if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack)
+			midx_fanout_add_pack_fanout(&fanout, ctx->info,
+						    ctx->preferred_pack_idx, 1,
 						    cur_fanout);
 
 		midx_fanout_sort(&fanout);
@@ -356,12 +352,12 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 						&fanout.entries[cur_object].oid))
 				continue;
 
-			ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1),
+			ALLOC_GROW(deduplicated_entries, st_add(ctx->entries_nr, 1),
 				   alloc_objects);
-			memcpy(&deduplicated_entries[*nr_objects],
+			memcpy(&deduplicated_entries[ctx->entries_nr],
 			       &fanout.entries[cur_object],
 			       sizeof(struct pack_midx_entry));
-			(*nr_objects)++;
+			ctx->entries_nr++;
 		}
 	}
 
@@ -1055,8 +1051,7 @@  static int write_midx_internal(const char *object_dir,
 		}
 	}
 
-	ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
-					 ctx.preferred_pack_idx);
+	ctx.entries = get_sorted_entries(&ctx);
 
 	ctx.large_offsets_needed = 0;
 	for (i = 0; i < ctx.entries_nr; i++) {