@@ -40,6 +40,8 @@
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
+#define MAX_SPLIT_COMMITS 64000
+
char *get_commit_graph_filename(const char *obj_dir)
{
return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -619,9 +621,14 @@ struct write_commit_graph_context {
unsigned long approx_nr_objects;
struct progress *progress;
int progress_done;
+ int num_commit_graphs_before;
+ int num_commit_graphs_after;
+ uint32_t new_num_commits_in_base;
+ struct commit_graph *new_base_graph;
uint64_t progress_cnt;
unsigned append:1,
- report_progress:1;
+ report_progress:1,
+ split:1;
};
static void write_graph_chunk_fanout(struct hashfile *f,
@@ -691,6 +698,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
ctx->commits.nr,
commit_to_sha1);
+ if (edge_value >= 0)
+ edge_value += ctx->new_num_commits_in_base;
+ else {
+ uint32_t pos;
+ if (find_commit_in_graph(parent->item,
+ ctx->new_base_graph,
+ &pos))
+ edge_value = pos;
+ }
+
if (edge_value < 0)
BUG("missing parent %s for commit %s",
oid_to_hex(&parent->item->object.oid),
@@ -711,6 +728,17 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len,
ctx->commits.list,
ctx->commits.nr,
commit_to_sha1);
+
+ if (edge_value >= 0)
+ edge_value += ctx->new_num_commits_in_base;
+ else {
+ uint32_t pos;
+ if (find_commit_in_graph(parent->item,
+ ctx->new_base_graph,
+ &pos))
+ edge_value = pos;
+ }
+
if (edge_value < 0)
BUG("missing parent %s for commit %s",
oid_to_hex(&parent->item->object.oid),
@@ -768,6 +796,16 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
ctx->commits.nr,
commit_to_sha1);
+ if (edge_value >= 0)
+ edge_value += ctx->new_num_commits_in_base;
+ else {
+ uint32_t pos;
+ if (find_commit_in_graph(parent->item,
+ ctx->new_base_graph,
+ &pos))
+ edge_value = pos;
+ }
+
if (edge_value < 0)
BUG("missing parent %s for commit %s",
oid_to_hex(&parent->item->object.oid),
@@ -782,7 +820,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f,
}
}
-static int commit_compare(const void *_a, const void *_b)
+static int oid_compare(const void *_a, const void *_b)
{
const struct object_id *a = (const struct object_id *)_a;
const struct object_id *b = (const struct object_id *)_b;
@@ -859,7 +897,13 @@ static void close_reachable(struct write_commit_graph_context *ctx)
display_progress(ctx->progress, i + 1);
commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
- if (commit && !parse_commit_no_graph(commit))
+ if (!commit)
+ continue;
+ if (ctx->split) {
+ if (!parse_commit(commit) &&
+ commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
+ add_missing_parents(ctx, commit);
+ } else if (!parse_commit_no_graph(commit))
add_missing_parents(ctx, commit);
}
stop_progress(&ctx->progress);
@@ -1051,12 +1095,20 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
_("Counting distinct commits in commit graph"),
ctx->oids.nr);
display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
- QSORT(ctx->oids.list, ctx->oids.nr, commit_compare);
+ QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
for (i = 1; i < ctx->oids.nr; i++) {
display_progress(ctx->progress, i + 1);
- if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
+ if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
+ if (ctx->split) {
+ struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
+
+ if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH)
+ continue;
+ }
+
count_distinct++;
+ }
}
stop_progress(&ctx->progress);
@@ -1079,7 +1131,13 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
continue;
+ ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
+
+ if (ctx->split &&
+ ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH)
+ continue;
+
parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
for (parent = ctx->commits.list[ctx->commits.nr]->parents;
@@ -1105,7 +1163,13 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
struct strbuf progress_title = STRBUF_INIT;
int num_chunks = ctx->num_extra_edges ? 4 : 3;
- ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+ if (ctx->num_commit_graphs_after > 1)
+ ctx->graph_name = get_split_graph_filename(
+ ctx->obj_dir,
+ ctx->num_commit_graphs_after - 1);
+ else
+ ctx->graph_name = get_commit_graph_filename(ctx->obj_dir);
+
if (safe_create_leading_directories(ctx->graph_name)) {
UNLEAK(ctx->graph_name);
error(_("unable to create leading directories of %s"),
@@ -1167,11 +1231,166 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
close_commit_graph(ctx->r);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+
+ while (ctx->num_commit_graphs_before > ctx->num_commit_graphs_after) {
+ char *graph_name = get_split_graph_filename(
+ ctx->obj_dir,
+ --ctx->num_commit_graphs_before);
+ unlink(graph_name);
+ free(graph_name);
+ }
+
commit_lock_file(&lk);
return 0;
}
+static size_t expected_commit_graph_size(size_t num_commits)
+{
+ return GRAPH_HEADER_SIZE + GRAPH_FANOUT_SIZE + 6 * GRAPH_CHUNKLOOKUP_WIDTH +
+ (num_commits + 1) * (GRAPH_DATA_WIDTH + the_hash_algo->rawsz);
+}
+
+static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
+{
+ struct commit_graph *g = ctx->r->objects->commit_graph;
+ uint32_t num_commits = ctx->commits.nr;
+ size_t expected_size = expected_commit_graph_size(num_commits);
+
+ ctx->num_commit_graphs_before = 0;
+ while (g) {
+ ctx->num_commit_graphs_before++;
+ g = g->base_graph;
+ }
+
+ g = ctx->r->objects->commit_graph;
+ ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
+
+ while (g && (g->data_len <= 2 * expected_size || num_commits > MAX_SPLIT_COMMITS)) {
+ num_commits += g->num_commits;
+ expected_size = expected_commit_graph_size(num_commits);
+ g = g->base_graph;
+ ctx->num_commit_graphs_after--;
+ }
+}
+
+static void merge_commit_graph(struct write_commit_graph_context *ctx,
+ struct commit_graph *g)
+{
+ uint32_t i;
+ uint32_t offset = g->num_commits_in_base;
+
+ for (i = 0; i < g->num_commits; i++) {
+ struct object_id oid;
+ struct commit *result;
+
+ display_progress(ctx->progress, i + 1);
+
+ load_oid_from_graph(g, i + offset, &oid);
+
+ /* only add commits if they still exist in the repo */
+ result = lookup_commit_reference_gently(ctx->r, &oid, 1);
+
+ if (result) {
+ ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
+ ctx->commits.list[ctx->commits.nr] = result;
+ ctx->commits.nr++;
+ }
+ }
+}
+
+static int commit_compare(const void *_a, const void *_b)
+{
+ const struct commit *a = *(const struct commit **)_a;
+ const struct commit *b = *(const struct commit **)_b;
+ return oidcmp(&a->object.oid, &b->object.oid);
+}
+
+static void deduplicate_commits(struct write_commit_graph_context *ctx)
+{
+ uint32_t i, num_parents, last_distinct = 0, duplicates = 0;
+ struct commit_list *parent;
+
+ if (ctx->report_progress)
+ ctx->progress = start_delayed_progress(
+ _("De-duplicating merged commits"),
+ ctx->commits.nr);
+
+ QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
+
+ ctx->num_extra_edges = 0;
+ for (i = 1; i < ctx->commits.nr; i++) {
+ display_progress(ctx->progress, i);
+
+ if (oideq(&ctx->commits.list[last_distinct]->object.oid,
+ &ctx->commits.list[i]->object.oid)) {
+ duplicates++;
+ } else {
+ if (duplicates)
+ ctx->commits.list[last_distinct + 1] = ctx->commits.list[i];
+ last_distinct++;
+
+ num_parents = 0;
+ for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next)
+ num_parents++;
+
+ if (num_parents > 2)
+ ctx->num_extra_edges += num_parents - 2;
+ }
+ }
+
+ ctx->commits.nr -= duplicates;
+ stop_progress(&ctx->progress);
+}
+
+static void merge_commit_graphs(struct write_commit_graph_context *ctx)
+{
+ struct commit_graph *g = ctx->r->objects->commit_graph;
+ uint32_t current_graph_number = ctx->num_commit_graphs_before;
+ struct strbuf progress_title = STRBUF_INIT;
+
+ while (g && current_graph_number >= ctx->num_commit_graphs_after) {
+ current_graph_number--;
+
+ if (ctx->report_progress) {
+ if (current_graph_number)
+ strbuf_addf(&progress_title,
+ _("Merging commit-graph-%d"),
+ current_graph_number);
+ else
+ strbuf_addstr(&progress_title,
+ _("Merging commit-graph"));
+ ctx->progress = start_delayed_progress(progress_title.buf, 0);
+ }
+
+ merge_commit_graph(ctx, g);
+ stop_progress(&ctx->progress);
+ strbuf_release(&progress_title);
+
+ g = g->base_graph;
+ }
+
+ if (g) {
+ ctx->new_base_graph = g;
+ ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
+ }
+
+ deduplicate_commits(ctx);
+}
+
+static void collapse_all_commit_graphs(struct write_commit_graph_context *ctx)
+{
+ struct commit_graph *g = ctx->r->objects->commit_graph;
+
+ ctx->num_commit_graphs_after = 1;
+ ctx->num_commit_graphs_before = 0;
+
+ while (g) {
+ ctx->num_commit_graphs_before++;
+ g = g->base_graph;
+ }
+}
+
int write_commit_graph(const char *obj_dir,
struct string_list *pack_indexes,
struct string_list *commit_hex,
@@ -1189,10 +1408,17 @@ int write_commit_graph(const char *obj_dir,
ctx->obj_dir = obj_dir;
ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0;
+ ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0;
+
+ if (ctx->split)
+ prepare_commit_graph(ctx->r);
ctx->approx_nr_objects = approximate_object_count();
ctx->oids.alloc = ctx->approx_nr_objects / 32;
+ if (ctx->split && ctx->oids.alloc > MAX_SPLIT_COMMITS)
+ ctx->oids.alloc = MAX_SPLIT_COMMITS;
+
if (ctx->append) {
prepare_commit_graph_one(ctx->r, ctx->obj_dir);
if (ctx->r->objects->commit_graph)
@@ -1243,6 +1469,16 @@ int write_commit_graph(const char *obj_dir,
goto cleanup;
}
+ if (!ctx->commits.nr)
+ goto cleanup;
+
+ if (ctx->split) {
+ split_graph_merge_strategy(ctx);
+
+ merge_commit_graphs(ctx);
+ } else
+ collapse_all_commit_graphs(ctx);
+
compute_generation_numbers(ctx);
res = write_commit_graph_file(ctx);
@@ -71,6 +71,7 @@ int generation_numbers_enabled(struct repository *r);
#define COMMIT_GRAPH_APPEND (1 << 0)
#define COMMIT_GRAPH_PROGRESS (1 << 1)
+#define COMMIT_GRAPH_SPLIT (1 << 2)
int write_commit_graph_reachable(const char *obj_dir, unsigned int flags);
int write_commit_graph(const char *obj_dir,
@@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' '
test_expect_success 'write graph with no packs' '
cd "$TRASH_DIRECTORY/full" &&
git commit-graph write --object-dir . &&
- test_path_is_file info/commit-graph
+ test_path_is_missing info/commit-graph
'
test_expect_success 'create commits and repack' '