[v4,07/15] commit-graph: examine changed-path objects in pack order
diff mbox series

Message ID d24c85c54ef841eb2a62d95937c1bf9884ba690a.1586192395.git.gitgitgadget@gmail.com
State New
Headers show
  • Changed Paths Bloom Filters
Related show

Commit Message

Johannes Schindelin via GitGitGadget April 6, 2020, 4:59 p.m. UTC
From: Jeff King <peff@peff.net>

Looking at the diff of commit objects in pack order is much faster than
in sha1 order, as it gives locality to the access of tree deltas
(whereas sha1 order is effectively random). Unfortunately the
commit-graph code sorts the commits (several times, sometimes as an oid
and sometimes a pointer-to-commit), and we ultimately traverse in sha1

Instead, let's remember the position at which we see each commit, and
traverse in that order when looking at bloom filters. This drops my time
for "git commit-graph write --changed-paths" in linux.git from ~4
minutes to ~1.5 minutes.

Probably the "--reachable" code path would want something similar.

Or alternatively, we could use a different data structure (either a
hash, or maybe even just a bit in "struct commit") to keep track of
which oids we've seen, etc instead of sorting. And then we could keep
the original order.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Garima Singh <garima.singh@microsoft.com>
 commit-graph.c | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff mbox series

diff --git a/commit-graph.c b/commit-graph.c
index 862a00d67ed..31b06f878ce 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -17,6 +17,7 @@ 
 #include "replace-object.h"
 #include "progress.h"
 #include "bloom.h"
+#include "commit-slab.h"
 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
@@ -46,9 +47,32 @@ 
 /* Remember to update object flag allocation in object.h */
 #define REACHABLE       (1u<<15)
-char *get_commit_graph_filename(struct object_directory *odb)
+/* Keep track of the order in which commits are added to our list. */
+define_commit_slab(commit_pos, int);
+static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
+static void set_commit_pos(struct repository *r, const struct object_id *oid)
+	static int32_t max_pos;
+	struct commit *commit = lookup_commit(r, oid);
+	if (!commit)
+		return; /* should never happen, but be lenient */
+	*commit_pos_at(&commit_pos, commit) = max_pos++;
+static int commit_pos_cmp(const void *va, const void *vb)
-	return xstrfmt("%s/info/commit-graph", odb->path);
+	const struct commit *a = *(const struct commit **)va;
+	const struct commit *b = *(const struct commit **)vb;
+	return commit_pos_at(&commit_pos, a) -
+	       commit_pos_at(&commit_pos, b);
+char *get_commit_graph_filename(struct object_directory *obj_dir)
+	return xstrfmt("%s/info/commit-graph", obj_dir->path);
 static char *get_split_graph_filename(struct object_directory *odb,
@@ -1021,6 +1045,8 @@  static int add_packed_commits(const struct object_id *oid,
 	oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid);
+	set_commit_pos(ctx->r, oid);
 	return 0;
@@ -1141,6 +1167,7 @@  static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 	int i;
 	struct progress *progress = NULL;
+	struct commit **sorted_commits;
@@ -1149,13 +1176,18 @@  static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 			_("Computing commit changed paths Bloom filters"),
+	ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
+	COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
+	QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
 	for (i = 0; i < ctx->commits.nr; i++) {
-		struct commit *c = ctx->commits.list[i];
+		struct commit *c = sorted_commits[i];
 		struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
 		ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
 		display_progress(progress, i + 1);
+	free(sorted_commits);