diff mbox series

[v4,07/13] pack-bitmap.c: teach `rev-list --test-bitmap` about incremental MIDXs

Message ID b45a9ccbc20180e3358e314b4fd5e46bfa566241.1741983492.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series midx: incremental multi-pack indexes, part two | expand

Commit Message

Taylor Blau March 14, 2025, 8:18 p.m. UTC
Implement support for the special `--test-bitmap` mode of `git rev-list`
when using incremental MIDXs.

The bitmap_test_data structure is extended to contain a "base" pointer
that mirrors the structure of the bitmap chain that it is being used to
test.

When we find a commit to test, we first chase down the ->base pointer to
find the appropriate bitmap_test_data for the bitmap layer that the
given commit is contained within, and then perform the test on that
bitmap.

In order to implement this, light modifications are made to
bitmap_for_commit() to reimplement it in terms of a new function,
find_bitmap_for_commit(), which fills out a pointer which indicates the
bitmap layer which contains the given commit.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap.c | 107 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 21 deletions(-)

Comments

Elijah Newren March 18, 2025, 5:31 a.m. UTC | #1
On Fri, Mar 14, 2025 at 1:18 PM Taylor Blau <me@ttaylorr.com> wrote:
>
> Implement support for the special `--test-bitmap` mode of `git rev-list`
> when using incremental MIDXs.
>
> The bitmap_test_data structure is extended to contain a "base" pointer
> that mirrors the structure of the bitmap chain that it is being used to
> test.
>
> When we find a commit to test, we first chase down the ->base pointer to
> find the appropriate bitmap_test_data for the bitmap layer that the
> given commit is contained within, and then perform the test on that
> bitmap.
>
> In order to implement this, light modifications are made to
> bitmap_for_commit() to reimplement it in terms of a new function,
> find_bitmap_for_commit(), which fills out a pointer which indicates the
> bitmap layer which contains the given commit.
>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  pack-bitmap.c | 107 ++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 86 insertions(+), 21 deletions(-)
>
> diff --git a/pack-bitmap.c b/pack-bitmap.c
> index 7a41535425..bb09ce3cf5 100644
> --- a/pack-bitmap.c
> +++ b/pack-bitmap.c
[...]
> +
> +static void bitmap_test_data_prepare(struct bitmap_test_data *tdata,
> +                                    struct bitmap_index *bitmap_git)
> +{
> +       memset(tdata, 0, sizeof(struct bitmap_test_data));

So, the first thing this function does is 0 out tdata.

> +
> +       tdata->bitmap_git = bitmap_git;
> +       tdata->base = bitmap_new();
> +       tdata->commits = ewah_to_bitmap(bitmap_git->commits);
> +       tdata->trees = ewah_to_bitmap(bitmap_git->trees);
> +       tdata->blobs = ewah_to_bitmap(bitmap_git->blobs);
> +       tdata->tags = ewah_to_bitmap(bitmap_git->tags);
> +
> +       if (bitmap_git->base) {
> +               CALLOC_ARRAY(tdata->base_tdata, 1);

We use CALLOC to both allocate the array and set it all to 0...

> +               bitmap_test_data_prepare(tdata->base_tdata, bitmap_git->base);

and then call bitmap_test_data_prepare() which will re-zero it all out.

Should we either ditch the zeroing at the beginning of the function,
or use xmalloc instead of CALLOC_ARRAY, to avoid duplicate zeroing?

> +       }
> +}

Didn't spot anything else.
Taylor Blau March 19, 2025, 12:30 a.m. UTC | #2
On Mon, Mar 17, 2025 at 10:31:06PM -0700, Elijah Newren wrote:
> > +static void bitmap_test_data_prepare(struct bitmap_test_data *tdata,
> > +                                    struct bitmap_index *bitmap_git)
> > +{
> > +       memset(tdata, 0, sizeof(struct bitmap_test_data));
>
> So, the first thing this function does is 0 out tdata.
>
> > +
> > +       tdata->bitmap_git = bitmap_git;
> > +       tdata->base = bitmap_new();
> > +       tdata->commits = ewah_to_bitmap(bitmap_git->commits);
> > +       tdata->trees = ewah_to_bitmap(bitmap_git->trees);
> > +       tdata->blobs = ewah_to_bitmap(bitmap_git->blobs);
> > +       tdata->tags = ewah_to_bitmap(bitmap_git->tags);
> > +
> > +       if (bitmap_git->base) {
> > +               CALLOC_ARRAY(tdata->base_tdata, 1);
>
> We use CALLOC to both allocate the array and set it all to 0...
>
> > +               bitmap_test_data_prepare(tdata->base_tdata, bitmap_git->base);
>
> and then call bitmap_test_data_prepare() which will re-zero it all out.
>
> Should we either ditch the zeroing at the beginning of the function,
> or use xmalloc instead of CALLOC_ARRAY, to avoid duplicate zeroing?

Ah... good point. I think between the two we should drop the
CALLOC_ARRAY() and just xmalloc() it.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/pack-bitmap.c b/pack-bitmap.c
index 7a41535425..bb09ce3cf5 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -938,8 +938,9 @@  static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_
 	return NULL;
 }
 
-struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
-				      struct commit *commit)
+static struct ewah_bitmap *find_bitmap_for_commit(struct bitmap_index *bitmap_git,
+						  struct commit *commit,
+						  struct bitmap_index **found)
 {
 	khiter_t hash_pos;
 	if (!bitmap_git)
@@ -949,18 +950,30 @@  struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
 	if (hash_pos >= kh_end(bitmap_git->bitmaps)) {
 		struct stored_bitmap *bitmap = NULL;
 		if (!bitmap_git->table_lookup)
-			return bitmap_for_commit(bitmap_git->base, commit);
+			return find_bitmap_for_commit(bitmap_git->base, commit,
+						      found);
 
 		/* this is a fairly hot codepath - no trace2_region please */
 		/* NEEDSWORK: cache misses aren't recorded */
 		bitmap = lazy_bitmap_for_commit(bitmap_git, commit);
 		if (!bitmap)
-			return bitmap_for_commit(bitmap_git->base, commit);
+			return find_bitmap_for_commit(bitmap_git->base, commit,
+						      found);
+		if (found)
+			*found = bitmap_git;
 		return lookup_stored_bitmap(bitmap);
 	}
+	if (found)
+		*found = bitmap_git;
 	return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
 }
 
+struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
+				      struct commit *commit)
+{
+	return find_bitmap_for_commit(bitmap_git, commit, NULL);
+}
+
 static inline int bitmap_position_extended(struct bitmap_index *bitmap_git,
 					   const struct object_id *oid)
 {
@@ -2511,6 +2524,8 @@  struct bitmap_test_data {
 	struct bitmap *tags;
 	struct progress *prg;
 	size_t seen;
+
+	struct bitmap_test_data *base_tdata;
 };
 
 static void test_bitmap_type(struct bitmap_test_data *tdata,
@@ -2519,6 +2534,11 @@  static void test_bitmap_type(struct bitmap_test_data *tdata,
 	enum object_type bitmap_type = OBJ_NONE;
 	int bitmaps_nr = 0;
 
+	if (bitmap_is_midx(tdata->bitmap_git)) {
+		while (pos < tdata->bitmap_git->midx->num_objects_in_base)
+			tdata = tdata->base_tdata;
+	}
+
 	if (bitmap_get(tdata->commits, pos)) {
 		bitmap_type = OBJ_COMMIT;
 		bitmaps_nr++;
@@ -2582,13 +2602,57 @@  static void test_show_commit(struct commit *commit, void *data)
 	display_progress(tdata->prg, ++tdata->seen);
 }
 
+static uint32_t bitmap_total_entry_count(struct bitmap_index *bitmap_git)
+{
+	uint32_t total = 0;
+	do {
+		total = st_add(total, bitmap_git->entry_count);
+		bitmap_git = bitmap_git->base;
+	} while (bitmap_git);
+
+	return total;
+}
+
+static void bitmap_test_data_prepare(struct bitmap_test_data *tdata,
+				     struct bitmap_index *bitmap_git)
+{
+	memset(tdata, 0, sizeof(struct bitmap_test_data));
+
+	tdata->bitmap_git = bitmap_git;
+	tdata->base = bitmap_new();
+	tdata->commits = ewah_to_bitmap(bitmap_git->commits);
+	tdata->trees = ewah_to_bitmap(bitmap_git->trees);
+	tdata->blobs = ewah_to_bitmap(bitmap_git->blobs);
+	tdata->tags = ewah_to_bitmap(bitmap_git->tags);
+
+	if (bitmap_git->base) {
+		CALLOC_ARRAY(tdata->base_tdata, 1);
+		bitmap_test_data_prepare(tdata->base_tdata, bitmap_git->base);
+	}
+}
+
+static void bitmap_test_data_release(struct bitmap_test_data *tdata)
+{
+	if (!tdata)
+		return;
+
+	bitmap_test_data_release(tdata->base_tdata);
+	free(tdata->base_tdata);
+
+	bitmap_free(tdata->base);
+	bitmap_free(tdata->commits);
+	bitmap_free(tdata->trees);
+	bitmap_free(tdata->blobs);
+	bitmap_free(tdata->tags);
+}
+
 void test_bitmap_walk(struct rev_info *revs)
 {
 	struct object *root;
 	struct bitmap *result = NULL;
 	size_t result_popcnt;
 	struct bitmap_test_data tdata;
-	struct bitmap_index *bitmap_git;
+	struct bitmap_index *bitmap_git, *found;
 	struct ewah_bitmap *bm;
 
 	if (!(bitmap_git = prepare_bitmap_git(revs->repo)))
@@ -2597,17 +2661,28 @@  void test_bitmap_walk(struct rev_info *revs)
 	if (revs->pending.nr != 1)
 		die(_("you must specify exactly one commit to test"));
 
-	fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)",
+	fprintf_ln(stderr, "Bitmap v%d test (%d entries%s, %d total)",
 		bitmap_git->version,
 		bitmap_git->entry_count,
-		bitmap_git->table_lookup ? "" : " loaded");
+		bitmap_git->table_lookup ? "" : " loaded",
+		bitmap_total_entry_count(bitmap_git));
 
 	root = revs->pending.objects[0].item;
-	bm = bitmap_for_commit(bitmap_git, (struct commit *)root);
+	bm = find_bitmap_for_commit(bitmap_git, (struct commit *)root, &found);
 
 	if (bm) {
 		fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum",
-			oid_to_hex(&root->oid), (int)bm->bit_size, ewah_checksum(bm));
+			oid_to_hex(&root->oid),
+			(int)bm->bit_size, ewah_checksum(bm));
+
+		if (bitmap_is_midx(found))
+			fprintf_ln(stderr, "Located via MIDX '%s'.",
+				   hash_to_hex_algop(get_midx_checksum(found->midx),
+						     revs->repo->hash_algo));
+		else
+			fprintf_ln(stderr, "Located via pack '%s'.",
+				   hash_to_hex_algop(found->pack->hash,
+						     revs->repo->hash_algo));
 
 		result = ewah_to_bitmap(bm);
 	}
@@ -2624,16 +2699,10 @@  void test_bitmap_walk(struct rev_info *revs)
 	if (prepare_revision_walk(revs))
 		die(_("revision walk setup failed"));
 
-	tdata.bitmap_git = bitmap_git;
-	tdata.base = bitmap_new();
-	tdata.commits = ewah_to_bitmap(bitmap_git->commits);
-	tdata.trees = ewah_to_bitmap(bitmap_git->trees);
-	tdata.blobs = ewah_to_bitmap(bitmap_git->blobs);
-	tdata.tags = ewah_to_bitmap(bitmap_git->tags);
+	bitmap_test_data_prepare(&tdata, bitmap_git);
 	tdata.prg = start_progress(revs->repo,
 				   "Verifying bitmap entries",
 				   result_popcnt);
-	tdata.seen = 0;
 
 	traverse_commit_list(revs, &test_show_commit, &test_show_object, &tdata);
 
@@ -2645,11 +2714,7 @@  void test_bitmap_walk(struct rev_info *revs)
 		die(_("mismatch in bitmap results"));
 
 	bitmap_free(result);
-	bitmap_free(tdata.base);
-	bitmap_free(tdata.commits);
-	bitmap_free(tdata.trees);
-	bitmap_free(tdata.blobs);
-	bitmap_free(tdata.tags);
+	bitmap_test_data_release(&tdata);
 	free_bitmap_index(bitmap_git);
 }