diff mbox series

[3/3] commit-reach: use heuristic in remove_redundant()

Message ID fbe7bdc1ec2ba6014dacc3a7ffa626b24c9aa7a5.1611851095.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Speed up remove_redundant() | expand

Commit Message

Derrick Stolee Jan. 28, 2021, 4:24 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

Reachability algorithms in commit-reach.c frequently benefit from using
the first-parent history as a heuristic for satisfying reachability
queries. The most obvious example was implemented in 4fbcca4e
(commit-reach: make can_all_from_reach... linear, 2018-07-20).

Update the walk in remove_redundant() to use this same heuristic. Here,
we are walking starting at the parents of the input commits. Sort those
parents and walk from the highest generation to lower. Each time, use
the heuristic of searching the first parent history before continuing to
expand the walk.

The important piece is to ensure we short-circuit the walk when we find
that there is a single non-redundant commit. This happens frequently
when looking for merge-bases or comparing several tags with 'git
merge-base --independent'. Use a new count 'count_still_independent' and
if that hits 1 we can stop walking.

To update 'count_still_independent' properly, we add use of the RESULT
flag on the input commits. Then we can detect when we reach one of these
commits and decrease the count. We need to remove the RESULT flag at
that moment because we might re-visit that commit when popping the
stack.

We use the STALE flag to mark parents that have been added to the new
walk_start list, but we need to clear that flag before we start walking
so those flags don't halt our depth-first-search walk.

On my copy of the Linux kernel repository, the performance of 'git
merge-base --independent <all-tags>' goes from 1.1 seconds to 0.11
seconds.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 72 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/commit-reach.c b/commit-reach.c
index 783c604a405..6032624282e 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -179,10 +179,13 @@  static int remove_redundant(struct repository *r, struct commit **array, int cnt
 	 * the array, and return the number of commits that
 	 * are independent from each other.
 	 */
-	int i, count_non_stale = 0;
+	int i, count_non_stale = 0, count_still_independent = cnt;
 	timestamp_t min_generation = GENERATION_NUMBER_INFINITY;
 	struct commit **dup;
-	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
+	struct commit **walk_start;
+	size_t walk_start_nr = 0, walk_start_alloc = cnt;
+
+	ALLOC_ARRAY(walk_start, walk_start_alloc);
 
 	/* Mark all parents of the input as STALE */
 	for (i = 0; i < cnt; i++) {
@@ -190,13 +193,15 @@  static int remove_redundant(struct repository *r, struct commit **array, int cnt
 		timestamp_t generation;
 
 		repo_parse_commit(r, array[i]);
+		array[i]->object.flags |= RESULT;
 		parents = array[i]->parents;
 
 		while (parents) {
 			repo_parse_commit(r, parents->item);
 			if (!(parents->item->object.flags & STALE)) {
 				parents->item->object.flags |= STALE;
-				prio_queue_put(&queue, parents->item);
+				ALLOC_GROW(walk_start, walk_start_nr + 1, walk_start_alloc);
+				walk_start[walk_start_nr++] = parents->item;
 			}
 			parents = parents->next;
 		}
@@ -207,30 +212,65 @@  static int remove_redundant(struct repository *r, struct commit **array, int cnt
 			min_generation = generation;
 	}
 
-	/* push the STALE bits up to min generation */
-	while (queue.nr) {
-		struct commit_list *parents;
-		struct commit *c = prio_queue_get(&queue);
+	QSORT(walk_start, walk_start_nr, compare_commits_by_gen);
 
-		repo_parse_commit(r, c);
+	/* remove STALE bit for now to allow walking through parents */
+	for (i = 0; i < walk_start_nr; i++)
+		walk_start[i]->object.flags &= ~STALE;
 
-		if (commit_graph_generation(c) < min_generation)
-			continue;
+	/*
+	 * Start walking from the highest generation. Hopefully, it will
+	 * find all other items during the first-parent walk, and we can
+	 * terminate early. Otherwise, we will do the same amount of work
+	 * as before.
+	 */
+	for (i = walk_start_nr - 1; i >= 0 && count_still_independent > 1; i--) {
+		/* push the STALE bits up to min generation */
+		struct commit_list *stack = NULL;
 
-		parents = c->parents;
-		while (parents) {
-			if (!(parents->item->object.flags & STALE)) {
-				parents->item->object.flags |= STALE;
-				prio_queue_put(&queue, parents->item);
+		commit_list_insert(walk_start[i], &stack);
+		walk_start[i]->object.flags |= STALE;
+
+		while (stack) {
+			struct commit_list *parents;
+			struct commit *c = stack->item;
+
+			repo_parse_commit(r, c);
+
+			if (c->object.flags & RESULT) {
+				c->object.flags &= ~RESULT;
+				if (--count_still_independent <= 1)
+					break;
 			}
-			parents = parents->next;
+
+			if (commit_graph_generation(c) < min_generation) {
+				pop_commit(&stack);
+				continue;
+			}
+
+			parents = c->parents;
+			while (parents) {
+				if (!(parents->item->object.flags & STALE)) {
+					parents->item->object.flags |= STALE;
+					commit_list_insert(parents->item, &stack);
+					break;
+				}
+				parents = parents->next;
+			}
+
+			/* pop if all parents have been visited already */
+			if (!parents)
+				pop_commit(&stack);
 		}
+		free_commit_list(stack);
 	}
+	free(walk_start);
 
 	/* rearrange array */
 	dup = xcalloc(cnt, sizeof(struct commit *));
 	COPY_ARRAY(dup, array, cnt);
 	for (i = 0; i < cnt; i++) {
+		dup[i]->object.flags &= ~RESULT;
 		if (dup[i]->object.flags & STALE) {
 			int insert = cnt - 1 - (i - count_non_stale);
 			array[insert] = dup[i];