diff mbox series

[v3,3/8] commit-graph: combine generation computations

Message ID 3b15e9df770a118331a1b25f51de8ce97c1b7cab.1678902343.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series ref-filter: ahead/behind counting, faster --merged option | expand

Commit Message

Derrick Stolee March 15, 2023, 5:45 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

This patch extracts the common code used to compute topological levels
and corrected committer dates into a common routine,
compute_reachable_generation_numbers_1().

This new routine dispatches to call the necessary functions to get and
set the generation number for a given commit through a vtable (the
compute_generation_info struct).

Computing the generation number itself is done in
compute_generation_from_max(), which dispatches its implementation based
on the generation version requested, or issuing a BUG() for unrecognized
generation versions.

This patch cleans up the two places that currently compute topological
levels and corrected commit dates by reducing the amount of duplicated
code. It also makes it possible to introduce a function which
dynamically computes those values for commits that aren't stored in a
commit-graph, which will be required for the forthcoming ahead-behind
rewrite.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 commit-graph.c | 171 +++++++++++++++++++++++++++++++------------------
 1 file changed, 107 insertions(+), 64 deletions(-)

Comments

Jonathan Tan March 15, 2023, 10:49 p.m. UTC | #1
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
> +static void compute_reachable_generation_numbers_1(
> +			struct compute_generation_info *info,
> +			int generation_version)
>  {
>  	int i;
>  	struct commit_list *list = NULL;
>  
> -	if (ctx->report_progress)
> -		ctx->progress = start_delayed_progress(
> -					_("Computing commit graph topological levels"),
> -					ctx->commits.nr);
> -	for (i = 0; i < ctx->commits.nr; i++) {
> -		struct commit *c = ctx->commits.list[i];
> -		uint32_t level;
> +	for (i = 0; i < info->commits->nr; i++) {
> +		struct commit *c = info->commits->list[i];
> +		timestamp_t gen;
> +		repo_parse_commit(info->r, c);
> +		gen = info->get_generation(c, info->data);
>  
> -		repo_parse_commit(ctx->r, c);
> -		level = *topo_level_slab_at(ctx->topo_levels, c);
> +		display_progress(info->progress, info->progress_cnt + 1);
>  
> -		display_progress(ctx->progress, i + 1);
> -		if (level != GENERATION_NUMBER_ZERO)
> +		if (gen != GENERATION_NUMBER_ZERO && gen != GENERATION_NUMBER_INFINITY)
>  			continue;
>  
>  		commit_list_insert(c, &list);

So this replaces a call to display_progress with another...

>  			if (all_parents_computed) {
>  				pop_commit(&list);
> -
> -				if (max_level > GENERATION_NUMBER_V1_MAX - 1)
> -					max_level = GENERATION_NUMBER_V1_MAX - 1;
> -				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
> +				gen = compute_generation_from_max(
> +						current, max_gen,
> +						generation_version);
> +				info->set_generation(current, gen, info->data);
>  			}

...here is where set_generation is called...

> +static void set_topo_level(struct commit *c, timestamp_t t, void *data)
> +{
> +	struct write_commit_graph_context *ctx = data;
> +	*topo_level_slab_at(ctx->topo_levels, c) = (uint32_t)t;
> +	display_progress(ctx->progress, ctx->progress_cnt + 1);
> +}

...is this display_progress() redundant? (set_topo_level() is one of the
possibilities that set_generation could be assigned to.) There already
seems to be one at the top. Further supporting my query is the fact that
in the hunk containing set_generation, there is no progress report on
the LHS of the diff.

> +static void set_generation_v2(struct commit *c, timestamp_t t, void *data)
> +{
> +	struct write_commit_graph_context *ctx = data;
> +	struct commit_graph_data *g = commit_graph_data_at(c);
> +	g->generation = (uint32_t)t;
> +	display_progress(ctx->progress, ctx->progress_cnt + 1);
> +}

Likewise for this function.

Everything else up to and including this patch looks good.
Derrick Stolee March 17, 2023, 6:30 p.m. UTC | #2
On 3/15/2023 6:49 PM, Jonathan Tan wrote:
> "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
>> +static void compute_reachable_generation_numbers_1(
>> +			struct compute_generation_info *info,
>> +			int generation_version)
>>  {
>>  	int i;
>>  	struct commit_list *list = NULL;
>>  
>> -	if (ctx->report_progress)
>> -		ctx->progress = start_delayed_progress(
>> -					_("Computing commit graph topological levels"),
>> -					ctx->commits.nr);
>> -	for (i = 0; i < ctx->commits.nr; i++) {
>> -		struct commit *c = ctx->commits.list[i];
>> -		uint32_t level;
>> +	for (i = 0; i < info->commits->nr; i++) {
>> +		struct commit *c = info->commits->list[i];
>> +		timestamp_t gen;
>> +		repo_parse_commit(info->r, c);
>> +		gen = info->get_generation(c, info->data);
>>  
>> -		repo_parse_commit(ctx->r, c);
>> -		level = *topo_level_slab_at(ctx->topo_levels, c);
>> +		display_progress(info->progress, info->progress_cnt + 1);
>>  
>> -		display_progress(ctx->progress, i + 1);
>> -		if (level != GENERATION_NUMBER_ZERO)
>> +		if (gen != GENERATION_NUMBER_ZERO && gen != GENERATION_NUMBER_INFINITY)
>>  			continue;
>>  
>>  		commit_list_insert(c, &list);
> 
> So this replaces a call to display_progress with another...
> 
>>  			if (all_parents_computed) {
>>  				pop_commit(&list);
>> -
>> -				if (max_level > GENERATION_NUMBER_V1_MAX - 1)
>> -					max_level = GENERATION_NUMBER_V1_MAX - 1;
>> -				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
>> +				gen = compute_generation_from_max(
>> +						current, max_gen,
>> +						generation_version);
>> +				info->set_generation(current, gen, info->data);
>>  			}
> 
> ...here is where set_generation is called...
> 
>> +static void set_topo_level(struct commit *c, timestamp_t t, void *data)
>> +{
>> +	struct write_commit_graph_context *ctx = data;
>> +	*topo_level_slab_at(ctx->topo_levels, c) = (uint32_t)t;
>> +	display_progress(ctx->progress, ctx->progress_cnt + 1);
>> +}
> 
> ...is this display_progress() redundant? (set_topo_level() is one of the
> possibilities that set_generation could be assigned to.) There already
> seems to be one at the top. Further supporting my query is the fact that
> in the hunk containing set_generation, there is no progress report on
> the LHS of the diff.

It turns out the progress is a bit redundant here, but not entirely in
the case of ensure_generations_valid() (if progress was enabled).

Let's break down the iteration, which has nested loops:

 1. for all commits in the initial list.
   2. perform DFS until generation can be computed. (while loop)

When writing a commit-graph file, that initial list is _every commit
in the commit-graph_, so having a display_progress() in the for loop
is sufficient to get the exact number.

In the case of ensure_generations_valid(), the number of assignments
in the while loop can be much larger than the initial input list.

However, ensure_generations_valid() does not use progress _and_ even
if it did, it would make sense to signal progress based on the number
of tips that need to be computed. I'll remove these progress counts
inside the mutators.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index c11b59f28b3..deccf984a0d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1446,24 +1446,53 @@  static void close_reachable(struct write_commit_graph_context *ctx)
 	stop_progress(&ctx->progress);
 }
 
-static void compute_topological_levels(struct write_commit_graph_context *ctx)
+struct compute_generation_info {
+	struct repository *r;
+	struct packed_commit_list *commits;
+	struct progress *progress;
+	int progress_cnt;
+
+	timestamp_t (*get_generation)(struct commit *c, void *data);
+	void (*set_generation)(struct commit *c, timestamp_t gen, void *data);
+	void *data;
+};
+
+static timestamp_t compute_generation_from_max(struct commit *c,
+					       timestamp_t max_gen,
+					       int generation_version)
+{
+	switch (generation_version) {
+	case 1: /* topological levels */
+		if (max_gen > GENERATION_NUMBER_V1_MAX - 1)
+			max_gen = GENERATION_NUMBER_V1_MAX - 1;
+		return max_gen + 1;
+
+	case 2: /* corrected commit date */
+		if (c->date && c->date > max_gen)
+			max_gen = c->date - 1;
+		return max_gen + 1;
+
+	default:
+		BUG("attempting unimplemented version");
+	}
+}
+
+static void compute_reachable_generation_numbers_1(
+			struct compute_generation_info *info,
+			int generation_version)
 {
 	int i;
 	struct commit_list *list = NULL;
 
-	if (ctx->report_progress)
-		ctx->progress = start_delayed_progress(
-					_("Computing commit graph topological levels"),
-					ctx->commits.nr);
-	for (i = 0; i < ctx->commits.nr; i++) {
-		struct commit *c = ctx->commits.list[i];
-		uint32_t level;
+	for (i = 0; i < info->commits->nr; i++) {
+		struct commit *c = info->commits->list[i];
+		timestamp_t gen;
+		repo_parse_commit(info->r, c);
+		gen = info->get_generation(c, info->data);
 
-		repo_parse_commit(ctx->r, c);
-		level = *topo_level_slab_at(ctx->topo_levels, c);
+		display_progress(info->progress, info->progress_cnt + 1);
 
-		display_progress(ctx->progress, i + 1);
-		if (level != GENERATION_NUMBER_ZERO)
+		if (gen != GENERATION_NUMBER_ZERO && gen != GENERATION_NUMBER_INFINITY)
 			continue;
 
 		commit_list_insert(c, &list);
@@ -1471,38 +1500,91 @@  static void compute_topological_levels(struct write_commit_graph_context *ctx)
 			struct commit *current = list->item;
 			struct commit_list *parent;
 			int all_parents_computed = 1;
-			uint32_t max_level = 0;
+			uint32_t max_gen = 0;
 
 			for (parent = current->parents; parent; parent = parent->next) {
-				repo_parse_commit(ctx->r, parent->item);
-				level = *topo_level_slab_at(ctx->topo_levels, parent->item);
+				repo_parse_commit(info->r, parent->item);
+				gen = info->get_generation(parent->item, info->data);
 
-				if (level == GENERATION_NUMBER_ZERO) {
+				if (gen == GENERATION_NUMBER_ZERO) {
 					all_parents_computed = 0;
 					commit_list_insert(parent->item, &list);
 					break;
 				}
 
-				if (level > max_level)
-					max_level = level;
+				if (gen > max_gen)
+					max_gen = gen;
 			}
 
 			if (all_parents_computed) {
 				pop_commit(&list);
-
-				if (max_level > GENERATION_NUMBER_V1_MAX - 1)
-					max_level = GENERATION_NUMBER_V1_MAX - 1;
-				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
+				gen = compute_generation_from_max(
+						current, max_gen,
+						generation_version);
+				info->set_generation(current, gen, info->data);
 			}
 		}
 	}
+}
+
+static timestamp_t get_topo_level(struct commit *c, void *data)
+{
+	struct write_commit_graph_context *ctx = data;
+	return *topo_level_slab_at(ctx->topo_levels, c);
+}
+
+static void set_topo_level(struct commit *c, timestamp_t t, void *data)
+{
+	struct write_commit_graph_context *ctx = data;
+	*topo_level_slab_at(ctx->topo_levels, c) = (uint32_t)t;
+	display_progress(ctx->progress, ctx->progress_cnt + 1);
+}
+
+static void compute_topological_levels(struct write_commit_graph_context *ctx)
+{
+	struct compute_generation_info info = {
+		.r = ctx->r,
+		.progress = ctx->progress,
+		.commits = &ctx->commits,
+		.get_generation = get_topo_level,
+		.set_generation = set_topo_level,
+		.data = ctx,
+	};
+
+	if (ctx->report_progress)
+		ctx->progress = start_delayed_progress(
+					_("Computing commit graph topological levels"),
+					ctx->commits.nr);
+
+	compute_reachable_generation_numbers_1(&info, 1);
+
 	stop_progress(&ctx->progress);
 }
 
+static timestamp_t get_generation_from_graph_data(struct commit *c, void *data)
+{
+	return commit_graph_data_at(c)->generation;
+}
+
+static void set_generation_v2(struct commit *c, timestamp_t t, void *data)
+{
+	struct write_commit_graph_context *ctx = data;
+	struct commit_graph_data *g = commit_graph_data_at(c);
+	g->generation = (uint32_t)t;
+	display_progress(ctx->progress, ctx->progress_cnt + 1);
+}
+
 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 {
 	int i;
-	struct commit_list *list = NULL;
+	struct compute_generation_info info = {
+		.r = ctx->r,
+		.progress = ctx->progress,
+		.commits = &ctx->commits,
+		.get_generation = get_generation_from_graph_data,
+		.set_generation = set_generation_v2,
+		.data = ctx,
+	};
 
 	if (ctx->report_progress)
 		ctx->progress = start_delayed_progress(
@@ -1517,47 +1599,7 @@  static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 		}
 	}
 
-	for (i = 0; i < ctx->commits.nr; i++) {
-		struct commit *c = ctx->commits.list[i];
-		timestamp_t corrected_commit_date;
-
-		repo_parse_commit(ctx->r, c);
-		corrected_commit_date = commit_graph_data_at(c)->generation;
-
-		display_progress(ctx->progress, i + 1);
-		if (corrected_commit_date != GENERATION_NUMBER_ZERO)
-			continue;
-
-		commit_list_insert(c, &list);
-		while (list) {
-			struct commit *current = list->item;
-			struct commit_list *parent;
-			int all_parents_computed = 1;
-			timestamp_t max_corrected_commit_date = 0;
-
-			for (parent = current->parents; parent; parent = parent->next) {
-				repo_parse_commit(ctx->r, parent->item);
-				corrected_commit_date = commit_graph_data_at(parent->item)->generation;
-
-				if (corrected_commit_date == GENERATION_NUMBER_ZERO) {
-					all_parents_computed = 0;
-					commit_list_insert(parent->item, &list);
-					break;
-				}
-
-				if (corrected_commit_date > max_corrected_commit_date)
-					max_corrected_commit_date = corrected_commit_date;
-			}
-
-			if (all_parents_computed) {
-				pop_commit(&list);
-
-				if (current->date && current->date > max_corrected_commit_date)
-					max_corrected_commit_date = current->date - 1;
-				commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
-			}
-		}
-	}
+	compute_reachable_generation_numbers_1(&info, 2);
 
 	for (i = 0; i < ctx->commits.nr; i++) {
 		struct commit *c = ctx->commits.list[i];
@@ -1565,6 +1607,7 @@  static void compute_generation_numbers(struct write_commit_graph_context *ctx)
 		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX)
 			ctx->num_generation_data_overflows++;
 	}
+
 	stop_progress(&ctx->progress);
 }