diff mbox series

[v2,2/4] midx-write.c: factor out common want_included_pack() routine

Message ID 0064e363c0cc3288346585a6b4340444ce7b863c.1712006190.git.me@ttaylorr.com (mailing list archive)
State Accepted
Commit e94be606f3153b486a4dd761762353658f72c978
Headers show
Series midx: split MIDX writing routines into midx-write.c, cleanup | expand

Commit Message

Taylor Blau April 1, 2024, 9:16 p.m. UTC
When performing a 'git multi-pack-index repack', the MIDX machinery
tries to aggregate MIDX'd packs together either to (a) fill the given
`--batch-size` argument, or (b) combine all packs together.

In either case (using the `midx-write.c::fill_included_packs_batch()` or
`midx-write.c::fill_included_packs_all()` function, respectively), we
evaluate whether or not we want to repack each MIDX'd pack, according to
whether or it is loadable, kept, cruft, or non-empty.

Between the two `fill_included_packs_` callers, they both care about the
same conditions, except for `fill_included_packs_batch()` which also
cares that the pack is non-empty.

We could extract two functions (say, `want_included_pack()` and a
`_nonempty()` variant), but this is not necessary. For the case in
`fill_included_packs_all()` which does not check the pack size, we add
all of the pack's objects assuming that the pack meets all other
criteria. But if the pack is empty in the first place, we add all of its
zero objects, so whether or not we "accept" or "reject" it in the first
place is irrelevant.

This change improves the readability in both `fill_included_packs_`
functions.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx-write.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

Comments

Patrick Steinhardt April 2, 2024, 11:47 a.m. UTC | #1
On Mon, Apr 01, 2024 at 05:16:38PM -0400, Taylor Blau wrote:
> When performing a 'git multi-pack-index repack', the MIDX machinery
> tries to aggregate MIDX'd packs together either to (a) fill the given
> `--batch-size` argument, or (b) combine all packs together.
> 
> In either case (using the `midx-write.c::fill_included_packs_batch()` or
> `midx-write.c::fill_included_packs_all()` function, respectively), we
> evaluate whether or not we want to repack each MIDX'd pack, according to
> whether or it is loadable, kept, cruft, or non-empty.
> 
> Between the two `fill_included_packs_` callers, they both care about the
> same conditions, except for `fill_included_packs_batch()` which also
> cares that the pack is non-empty.
> 
> We could extract two functions (say, `want_included_pack()` and a
> `_nonempty()` variant), but this is not necessary. For the case in
> `fill_included_packs_all()` which does not check the pack size, we add
> all of the pack's objects assuming that the pack meets all other
> criteria. But if the pack is empty in the first place, we add all of its
> zero objects, so whether or not we "accept" or "reject" it in the first
> place is irrelevant.
> 
> This change improves the readability in both `fill_included_packs_`
> functions.
> 
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  midx-write.c | 32 ++++++++++++++++++++------------
>  1 file changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/midx-write.c b/midx-write.c
> index 5242d2a724..906efa2169 100644
> --- a/midx-write.c
> +++ b/midx-write.c
> @@ -1349,6 +1349,24 @@ static int compare_by_mtime(const void *a_, const void *b_)
>  	return 0;
>  }
>  
> +static int want_included_pack(struct repository *r,
> +			      struct multi_pack_index *m,
> +			      int pack_kept_objects,
> +			      uint32_t pack_int_id)
> +{
> +	struct packed_git *p;
> +	if (prepare_midx_pack(r, m, pack_int_id))
> +		return 0;
> +	p = m->packs[pack_int_id];
> +	if (!pack_kept_objects && p->pack_keep)
> +		return 0;
> +	if (p->is_cruft)
> +		return 0;
> +	if (open_pack_index(p) || !p->num_objects)
> +		return 0;
> +	return 1;
> +}
> +
>  static int fill_included_packs_all(struct repository *r,
>  				   struct multi_pack_index *m,
>  				   unsigned char *include_pack)
> @@ -1359,11 +1377,7 @@ static int fill_included_packs_all(struct repository *r,
>  	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
>  
>  	for (i = 0; i < m->num_packs; i++) {
> -		if (prepare_midx_pack(r, m, i))
> -			continue;
> -		if (!pack_kept_objects && m->packs[i]->pack_keep)
> -			continue;
> -		if (m->packs[i]->is_cruft)
> +		if (!want_included_pack(r, m, pack_kept_objects, i))
>  			continue;
>  
>  		include_pack[i] = 1;
> @@ -1410,13 +1424,7 @@ static int fill_included_packs_batch(struct repository *r,
>  		struct packed_git *p = m->packs[pack_int_id];
>  		size_t expected_size;

I was briefly wondering whether `m->packs[pack_int_id]` could change
after the above assignment. But there's a loop a bit further up here
that already calls `prepare_midx_pack()`, so this shouldn't happen.

> -		if (!p)
> -			continue;
> -		if (!pack_kept_objects && p->pack_keep)
> -			continue;
> -		if (p->is_cruft)
> -			continue;
> -		if (open_pack_index(p) || !p->num_objects)
> +		if (!want_included_pack(r, m, pack_kept_objects, pack_int_id))
>  			continue;

Another thing I wondered was whether it hurts performance that we now
call `prepare_midx_pack()` twice. But this shouldn't matter either as we
exit early from that function in case `m->packs[pack_int_id]` is already
populated.

So this patch looks good to me.

Patrick

>  		expected_size = st_mult(p->pack_size,
> -- 
> 2.44.0.330.g158d2a670b4
> 
>
diff mbox series

Patch

diff --git a/midx-write.c b/midx-write.c
index 5242d2a724..906efa2169 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -1349,6 +1349,24 @@  static int compare_by_mtime(const void *a_, const void *b_)
 	return 0;
 }
 
+static int want_included_pack(struct repository *r,
+			      struct multi_pack_index *m,
+			      int pack_kept_objects,
+			      uint32_t pack_int_id)
+{
+	struct packed_git *p;
+	if (prepare_midx_pack(r, m, pack_int_id))
+		return 0;
+	p = m->packs[pack_int_id];
+	if (!pack_kept_objects && p->pack_keep)
+		return 0;
+	if (p->is_cruft)
+		return 0;
+	if (open_pack_index(p) || !p->num_objects)
+		return 0;
+	return 1;
+}
+
 static int fill_included_packs_all(struct repository *r,
 				   struct multi_pack_index *m,
 				   unsigned char *include_pack)
@@ -1359,11 +1377,7 @@  static int fill_included_packs_all(struct repository *r,
 	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
 
 	for (i = 0; i < m->num_packs; i++) {
-		if (prepare_midx_pack(r, m, i))
-			continue;
-		if (!pack_kept_objects && m->packs[i]->pack_keep)
-			continue;
-		if (m->packs[i]->is_cruft)
+		if (!want_included_pack(r, m, pack_kept_objects, i))
 			continue;
 
 		include_pack[i] = 1;
@@ -1410,13 +1424,7 @@  static int fill_included_packs_batch(struct repository *r,
 		struct packed_git *p = m->packs[pack_int_id];
 		size_t expected_size;
 
-		if (!p)
-			continue;
-		if (!pack_kept_objects && p->pack_keep)
-			continue;
-		if (p->is_cruft)
-			continue;
-		if (open_pack_index(p) || !p->num_objects)
+		if (!want_included_pack(r, m, pack_kept_objects, pack_int_id))
 			continue;
 
 		expected_size = st_mult(p->pack_size,