diff mbox series

[05/24] pack-bitmap: move some initialization to `bitmap_writer_init()`

Message ID a65316567391160dddae70fb036f03858772014f.1710972293.git.me@ttaylorr.com (mailing list archive)
State New
Headers show
Series pack-bitmap: pseudo-merge reachability bitmaps | expand

Commit Message

Taylor Blau March 20, 2024, 10:05 p.m. UTC
The pack-bitmap-writer machinery uses a oidmap (backed by khash.h) to
map from commits selected for bitmaps (by OID) to a bitmapped_commit
structure (containing the bitmap itself, among other things like its XOR
offset, etc.)

This map was initialized at the end of `bitmap_writer_build()`. New
entries are added in `pack-bitmap-write.c::store_selected()`, which is
called by the bitmap_builder machinery (which is responsible for
traversing history and generating the actual bitmaps).

Reorganize when this field is initialized and when entries are added to
it so that we can quickly determine whether a commit is a candidate for
pseudo-merge selection, or not (since it was already selected to receive
a bitmap, and thus is ineligible for pseudo-merge inclusion).

The changes are as follows:

  - Introduce a new `bitmap_writer_init()` function which initializes
    the `writer.bitmaps` field (instead of waiting until the end of
    `bitmap_writer_build()`).

  - Add map entries in `push_bitmapped_commit()` (which is called via
    `bitmap_writer_select_commits()`) with OID keys and NULL values to
    track whether or not we *expect* to write a bitmap for some given
    commit.

  - Validate that a NULL entry is found matching the given key when we
    store a selected bitmap.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/pack-objects.c |  1 +
 midx.c                 |  1 +
 pack-bitmap-write.c    | 23 ++++++++++++++++++-----
 pack-bitmap.h          |  1 +
 4 files changed, 21 insertions(+), 5 deletions(-)

Comments

Jeff King April 10, 2024, 6:10 p.m. UTC | #1
On Wed, Mar 20, 2024 at 06:05:13PM -0400, Taylor Blau wrote:

> The pack-bitmap-writer machinery uses a oidmap (backed by khash.h) to
> map from commits selected for bitmaps (by OID) to a bitmapped_commit
> structure (containing the bitmap itself, among other things like its XOR
> offset, etc.)
> 
> This map was initialized at the end of `bitmap_writer_build()`. New
> entries are added in `pack-bitmap-write.c::store_selected()`, which is
> called by the bitmap_builder machinery (which is responsible for
> traversing history and generating the actual bitmaps).
> 
> Reorganize when this field is initialized and when entries are added to
> it so that we can quickly determine whether a commit is a candidate for
> pseudo-merge selection, or not (since it was already selected to receive
> a bitmap, and thus is ineligible for pseudo-merge inclusion).

OK, makes sense. I don't think this should violate any assumptions in
the current bitmap code (and the sanity checks for duplicate/missing
entries in the hash seem right to me).

-Peff
diff mbox series

Patch

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 41281cae91f..34a431e3856 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1339,6 +1339,7 @@  static void write_pack_file(void)
 				    hash_to_hex(hash));
 
 			if (write_bitmap_index) {
+				bitmap_writer_init(the_repository);
 				bitmap_writer_set_checksum(hash);
 				bitmap_writer_build_type_index(
 					&to_pack, written_list, nr_written);
diff --git a/midx.c b/midx.c
index 366bfbe18c8..24d98120852 100644
--- a/midx.c
+++ b/midx.c
@@ -1311,6 +1311,7 @@  static int write_midx_bitmap(const char *midx_name,
 	for (i = 0; i < pdata->nr_objects; i++)
 		index[i] = &pdata->objects[i].idx;
 
+	bitmap_writer_init(the_repository);
 	bitmap_writer_show_progress(flags & MIDX_PROGRESS);
 	bitmap_writer_build_type_index(pdata, index, pdata->nr_objects);
 
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 3dc2408eca7..ad768959633 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -46,6 +46,11 @@  struct bitmap_writer {
 
 static struct bitmap_writer writer;
 
+void bitmap_writer_init(struct repository *r)
+{
+	writer.bitmaps = kh_init_oid_map();
+}
+
 void bitmap_writer_show_progress(int show)
 {
 	writer.show_progress = show;
@@ -117,11 +122,20 @@  void bitmap_writer_build_type_index(struct packing_data *to_pack,
 
 static inline void push_bitmapped_commit(struct commit *commit)
 {
+	int hash_ret;
+	khiter_t hash_pos;
+
 	if (writer.selected_nr >= writer.selected_alloc) {
 		writer.selected_alloc = (writer.selected_alloc + 32) * 2;
 		REALLOC_ARRAY(writer.selected, writer.selected_alloc);
 	}
 
+	hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
+	if (!hash_ret)
+		die(_("duplicate entry when writing bitmap index: %s"),
+		    oid_to_hex(&commit->object.oid));
+	kh_value(writer.bitmaps, hash_pos) = NULL;
+
 	writer.selected[writer.selected_nr].commit = commit;
 	writer.selected[writer.selected_nr].bitmap = NULL;
 	writer.selected[writer.selected_nr].flags = 0;
@@ -466,14 +480,14 @@  static void store_selected(struct bb_commit *ent, struct commit *commit)
 {
 	struct bitmapped_commit *stored = &writer.selected[ent->idx];
 	khiter_t hash_pos;
-	int hash_ret;
 
 	stored->bitmap = bitmap_to_ewah(ent->bitmap);
 
-	hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
-	if (hash_ret == 0)
-		die("Duplicate entry when writing index: %s",
+	hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid);
+	if (hash_pos == kh_end(writer.bitmaps))
+		die(_("attempted to store non-selected commit: '%s'"),
 		    oid_to_hex(&commit->object.oid));
+
 	kh_value(writer.bitmaps, hash_pos) = stored;
 }
 
@@ -488,7 +502,6 @@  int bitmap_writer_build(struct packing_data *to_pack)
 	uint32_t *mapping;
 	int closed = 1; /* until proven otherwise */
 
-	writer.bitmaps = kh_init_oid_map();
 	writer.to_pack = to_pack;
 
 	if (writer.show_progress)
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 3f96608d5c1..dae2d68a338 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -97,6 +97,7 @@  int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i
 
 off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
 
+void bitmap_writer_init(struct repository *r);
 void bitmap_writer_show_progress(int show);
 void bitmap_writer_set_checksum(const unsigned char *sha1);
 void bitmap_writer_build_type_index(struct packing_data *to_pack,