diff mbox series

[v4,23/24] pack-bitmap-write: relax unique revwalk condition

Message ID 8f9fdb0f43ef6aa6796ef50d39a4c911259aa3db.1607464775.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series pack-bitmap: bitmap generation improvements | expand

Commit Message

Taylor Blau Dec. 8, 2020, 10:05 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The previous commits improved the bitmap computation process for very
long, linear histories with many refs by removing quadratic growth in
how many objects were walked. The strategy of computing "intermediate
commits" using bitmasks for which refs can reach those commits
partitioned the poset of reachable objects so each part could be walked
exactly once. This was effective for linear histories.

However, there was a (significant) drawback: wide histories with many
refs had an explosion of memory costs to compute the commit bitmasks
during the exploration that discovers these intermediate commits. Since
these wide histories are unlikely to repeat walking objects, the benefit
of walking objects multiple times was not expensive before. But now, the
commit walk *before computing bitmaps* is incredibly expensive.

In an effort to discover a happy medium, this change reduces the walk
for intermediate commits to only the first-parent history. This focuses
the walk on how the histories converge, which still has significant
reduction in repeat object walks. It is still possible to create
quadratic behavior in this version, but it is probably less likely in
realistic data shapes.

Here is some data taken on a fresh clone of the kernel:

             |   runtime (sec)    |   peak heap (GB)   |
             |                    |                    |
             |   from  |   with   |   from  |   with   |
             | scratch | existing | scratch | existing |
  -----------+---------+----------+---------+-----------
    original |  64.044 |   83.241 |   2.088 |    2.194 |
  last patch |  45.049 |   37.624 |   2.267 |    2.334 |
  this patch |  88.478 |   53.218 |   2.157 |    2.224 |

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c     | 14 +++++---------
 t/t5310-pack-bitmaps.sh | 33 +++++++++++++++++----------------
 2 files changed, 22 insertions(+), 25 deletions(-)
diff mbox series

Patch

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 76c8236f94..d2af4a974f 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -199,7 +199,7 @@  static void bitmap_builder_init(struct bitmap_builder *bb,
 {
 	struct rev_info revs;
 	struct commit *commit;
-	unsigned int i, num_maximal;
+	unsigned int i, num_maximal = 0;
 
 	memset(bb, 0, sizeof(*bb));
 	init_bb_data(&bb->data);
@@ -207,6 +207,7 @@  static void bitmap_builder_init(struct bitmap_builder *bb,
 	reset_revision_walk();
 	repo_init_revisions(writer->to_pack->repo, &revs, NULL);
 	revs.topo_order = 1;
+	revs.first_parent_only = 1;
 
 	for (i = 0; i < writer->selected_nr; i++) {
 		struct commit *c = writer->selected[i].commit;
@@ -221,13 +222,12 @@  static void bitmap_builder_init(struct bitmap_builder *bb,
 
 		add_pending_object(&revs, &c->object, "");
 	}
-	num_maximal = writer->selected_nr;
 
 	if (prepare_revision_walk(&revs))
 		die("revision walk setup failed");
 
 	while ((commit = get_revision(&revs))) {
-		struct commit_list *p;
+		struct commit_list *p = commit->parents;
 		struct bb_commit *c_ent;
 
 		parse_commit_or_die(commit);
@@ -235,16 +235,12 @@  static void bitmap_builder_init(struct bitmap_builder *bb,
 		c_ent = bb_data_at(&bb->data, commit);
 
 		if (c_ent->maximal) {
-			if (!c_ent->selected) {
-				bitmap_set(c_ent->commit_mask, num_maximal);
-				num_maximal++;
-			}
-
+			num_maximal++;
 			ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
 			bb->commits[bb->commits_nr++] = commit;
 		}
 
-		for (p = commit->parents; p; p = p->next) {
+		if (p) {
 			struct bb_commit *p_ent = bb_data_at(&bb->data, p->item);
 			int c_not_p, p_not_c;
 
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index 6815fb6a4e..3a2c9d2d8e 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -23,12 +23,12 @@  has_any () {
 # To ensure the logic for "maximal commits" is exercised, make
 # the repository a bit more complicated.
 #
-#    other                         master
+#    other                         second
 #      *                             *
 # (99 commits)                  (99 commits)
 #      *                             *
 #      |\                           /|
-#      | * octo-other  octo-master * |
+#      | * octo-other  octo-second * |
 #      |/|\_________  ____________/|\|
 #      | \          \/  __________/  |
 #      |  | ________/\ /             |
@@ -43,23 +43,24 @@  has_any () {
 #                                   \|
 #                                    * (base)
 #
+# We only push bits down the first-parent history, which
+# makes some of these commits unimportant!
+#
 # The important part for the maximal commit algorithm is how
 # the bitmasks are extended. Assuming starting bit positions
-# for master (bit 0) and other (bit 1), and some flexibility
-# in the order that merge bases are visited, the bitmasks at
-# the end should be:
+# for second (bit 0) and other (bit 1), the bitmasks at the
+# end should be:
 #
-#      master: 1       (maximal, selected)
+#      second: 1       (maximal, selected)
 #       other: 01      (maximal, selected)
-# octo-master: 1
-#  octo-other: 01
-# merge-right: 111     (maximal)
-#        (l1): 111
-#        (r1): 111
-#  merge-left: 1101    (maximal)
-#        (l2): 11111   (maximal)
-#        (r2): 111101  (maximal)
-#      (base): 1111111 (maximal)
+#      (base): 11 (maximal)
+#
+# This complicated history was important for a previous
+# version of the walk that guarantees never walking a
+# commit multiple times. That goal might be important
+# again, so preserve this complicated case. For now, this
+# test will guarantee that the bitmaps are computed
+# correctly, even with the repeat calculations.
 
 test_expect_success 'setup repo with moderate-sized history' '
 	test_commit_bulk --id=file 10 &&
@@ -114,7 +115,7 @@  test_expect_success 'full repack creates bitmaps' '
 	ls .git/objects/pack/ | grep bitmap >output &&
 	test_line_count = 1 output &&
 	grep "\"key\":\"num_selected_commits\",\"value\":\"106\"" trace &&
-	grep "\"key\":\"num_maximal_commits\",\"value\":\"111\"" trace
+	grep "\"key\":\"num_maximal_commits\",\"value\":\"107\"" trace
 '
 
 test_expect_success 'rev-list --test-bitmap verifies bitmaps' '