diff mbox series

[v2,06/11] commit-graph: simplify chunk writes into loop

Message ID 8b959f2f374654aeb87b847560761890c2f9aa2c.1592934430.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series More commit-graph/Bloom filter improvements | expand

Commit Message

Linus Arver via GitGitGadget June 23, 2020, 5:47 p.m. UTC
From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>

In write_commit_graph_file() we now have one block of code filling the
array of 'struct chunk_info' with the IDs and sizes of chunks to be
written, and an other block of code calling the functions responsible
for writing individual chunks.  In case of optional chunks like Extra
Edge List an Base Graphs List there is also a condition checking
whether that chunk is necessary/desired, and that same condition is
repeated in both blocks of code. Other, newer chunks have similar
optional conditions.

Eliminate these repeated conditions by storing the function pointers
responsible for writing individual chunks in the 'struct chunk_info'
array as well, and calling them in a loop to write the commit-graph
file.  This will open up the possibility for a bit of foolproofing in
the following patch.

Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-graph.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

Comments

René Scharfe June 25, 2020, 7:25 a.m. UTC | #1
Am 23.06.20 um 19:47 schrieb SZEDER Gábor via GitGitGadget:
> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
>
> In write_commit_graph_file() we now have one block of code filling the
> array of 'struct chunk_info' with the IDs and sizes of chunks to be
> written, and an other block of code calling the functions responsible
> for writing individual chunks.  In case of optional chunks like Extra
> Edge List an Base Graphs List there is also a condition checking
> whether that chunk is necessary/desired, and that same condition is
> repeated in both blocks of code. Other, newer chunks have similar
> optional conditions.
>
> Eliminate these repeated conditions by storing the function pointers
> responsible for writing individual chunks in the 'struct chunk_info'
> array as well, and calling them in a loop to write the commit-graph
> file.  This will open up the possibility for a bit of foolproofing in
> the following patch.

You can do that without storing function pointers by selecting the
function to use based on the chunk ID -- like parse_commit_graph() does
on the read side.  Advantage: You don't need to press all write
functions into the same mold and can keep their individual signatures.

>
> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  commit-graph.c | 31 +++++++++++++++++++------------
>  1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/commit-graph.c b/commit-graph.c
> index f33bfe49b3..086fc2d070 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -1555,9 +1555,13 @@ static int write_graph_chunk_base(struct hashfile *f,
>  	return 0;
>  }
>
> +typedef int (*chunk_write_fn)(struct hashfile *f,
> +			      struct write_commit_graph_context *ctx);
> +
>  struct chunk_info {
>  	uint32_t id;
>  	uint64_t size;
> +	chunk_write_fn write_fn;
>  };
>
>  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
> @@ -1615,27 +1619,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>
>  	chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
>  	chunks[0].size = GRAPH_FANOUT_SIZE;
> +	chunks[0].write_fn = write_graph_chunk_fanout;
>  	chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
>  	chunks[1].size = hashsz * ctx->commits.nr;
> +	chunks[1].write_fn = write_graph_chunk_oids;
>  	chunks[2].id = GRAPH_CHUNKID_DATA;
>  	chunks[2].size = (hashsz + 16) * ctx->commits.nr;
> +	chunks[2].write_fn = write_graph_chunk_data;
>  	if (ctx->num_extra_edges) {
>  		chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
>  		chunks[num_chunks].size = 4 * ctx->num_extra_edges;
> +		chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
>  		num_chunks++;
>  	}
>  	if (ctx->changed_paths) {
>  		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
>  		chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
> +		chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
>  		num_chunks++;
>  		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
>  		chunks[num_chunks].size = sizeof(uint32_t) * 3
>  					  + ctx->total_bloom_filter_data_size;
> +		chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
>  		num_chunks++;
>  	}
>  	if (ctx->num_commit_graphs_after > 1) {
>  		chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
>  		chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
> +		chunks[num_chunks].write_fn = write_graph_chunk_base;
>  		num_chunks++;
>  	}
>
> @@ -1671,19 +1682,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>  			progress_title.buf,
>  			num_chunks * ctx->commits.nr);
>  	}
> -	write_graph_chunk_fanout(f, ctx);
> -	write_graph_chunk_oids(f, ctx);
> -	write_graph_chunk_data(f, ctx);
> -	if (ctx->num_extra_edges)
> -		write_graph_chunk_extra_edges(f, ctx);
> -	if (ctx->changed_paths) {
> -		write_graph_chunk_bloom_indexes(f, ctx);
> -		write_graph_chunk_bloom_data(f, ctx);
> -	}
> -	if (ctx->num_commit_graphs_after > 1 &&
> -	    write_graph_chunk_base(f, ctx)) {
> -		return -1;
> +
> +	for (i = 0; i < num_chunks; i++) {
> +		if (chunks[i].write_fn(f, ctx)) {
> +			error(_("failed writing chunk with id %"PRIx32""),
> +			      chunks[i].id);

Of all the write functions only write_graph_chunk_base() can return
non-zero and it already prints an error message in that case ("failed to
write correct number of base graph ids").  Why add this one?

> +			return -1;
> +		}
>  	}
> +
>  	stop_progress(&ctx->progress);
>  	strbuf_release(&progress_title);
>
>
Derrick Stolee June 25, 2020, 2:59 p.m. UTC | #2
On 6/25/2020 3:25 AM, René Scharfe wrote:
> Am 23.06.20 um 19:47 schrieb SZEDER Gábor via GitGitGadget:
>> From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= <szeder.dev@gmail.com>
>>
>> In write_commit_graph_file() we now have one block of code filling the
>> array of 'struct chunk_info' with the IDs and sizes of chunks to be
>> written, and an other block of code calling the functions responsible
>> for writing individual chunks.  In case of optional chunks like Extra
>> Edge List an Base Graphs List there is also a condition checking
>> whether that chunk is necessary/desired, and that same condition is
>> repeated in both blocks of code. Other, newer chunks have similar
>> optional conditions.
>>
>> Eliminate these repeated conditions by storing the function pointers
>> responsible for writing individual chunks in the 'struct chunk_info'
>> array as well, and calling them in a loop to write the commit-graph
>> file.  This will open up the possibility for a bit of foolproofing in
>> the following patch.
> 
> You can do that without storing function pointers by selecting the
> function to use based on the chunk ID -- like parse_commit_graph() does
> on the read side.  Advantage: You don't need to press all write
> functions into the same mold and can keep their individual signatures.

I do think that the loop without a switch statement is valuable.
It focuses the updates for new chunks to be localized to the
section that calculates the offset values.

>>
>> Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
>> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
>> ---
>>  commit-graph.c | 31 +++++++++++++++++++------------
>>  1 file changed, 19 insertions(+), 12 deletions(-)
>>
>> diff --git a/commit-graph.c b/commit-graph.c
>> index f33bfe49b3..086fc2d070 100644
>> --- a/commit-graph.c
>> +++ b/commit-graph.c
>> @@ -1555,9 +1555,13 @@ static int write_graph_chunk_base(struct hashfile *f,
>>  	return 0;
>>  }
>>
>> +typedef int (*chunk_write_fn)(struct hashfile *f,
>> +			      struct write_commit_graph_context *ctx);
>> +
>>  struct chunk_info {
>>  	uint32_t id;
>>  	uint64_t size;
>> +	chunk_write_fn write_fn;
>>  };
>>
>>  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>> @@ -1615,27 +1619,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>>
>>  	chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
>>  	chunks[0].size = GRAPH_FANOUT_SIZE;
>> +	chunks[0].write_fn = write_graph_chunk_fanout;
>>  	chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
>>  	chunks[1].size = hashsz * ctx->commits.nr;
>> +	chunks[1].write_fn = write_graph_chunk_oids;
>>  	chunks[2].id = GRAPH_CHUNKID_DATA;
>>  	chunks[2].size = (hashsz + 16) * ctx->commits.nr;
>> +	chunks[2].write_fn = write_graph_chunk_data;
>>  	if (ctx->num_extra_edges) {
>>  		chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
>>  		chunks[num_chunks].size = 4 * ctx->num_extra_edges;
>> +		chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
>>  		num_chunks++;
>>  	}
>>  	if (ctx->changed_paths) {
>>  		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
>>  		chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
>> +		chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
>>  		num_chunks++;
>>  		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
>>  		chunks[num_chunks].size = sizeof(uint32_t) * 3
>>  					  + ctx->total_bloom_filter_data_size;
>> +		chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
>>  		num_chunks++;
>>  	}
>>  	if (ctx->num_commit_graphs_after > 1) {
>>  		chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
>>  		chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
>> +		chunks[num_chunks].write_fn = write_graph_chunk_base;
>>  		num_chunks++;
>>  	}
>>
>> @@ -1671,19 +1682,15 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>>  			progress_title.buf,
>>  			num_chunks * ctx->commits.nr);
>>  	}
>> -	write_graph_chunk_fanout(f, ctx);
>> -	write_graph_chunk_oids(f, ctx);
>> -	write_graph_chunk_data(f, ctx);
>> -	if (ctx->num_extra_edges)
>> -		write_graph_chunk_extra_edges(f, ctx);
>> -	if (ctx->changed_paths) {
>> -		write_graph_chunk_bloom_indexes(f, ctx);
>> -		write_graph_chunk_bloom_data(f, ctx);
>> -	}
>> -	if (ctx->num_commit_graphs_after > 1 &&
>> -	    write_graph_chunk_base(f, ctx)) {
>> -		return -1;
>> +
>> +	for (i = 0; i < num_chunks; i++) {
>> +		if (chunks[i].write_fn(f, ctx)) {
>> +			error(_("failed writing chunk with id %"PRIx32""),
>> +			      chunks[i].id);
> 
> Of all the write functions only write_graph_chunk_base() can return
> non-zero and it already prints an error message in that case ("failed to
> write correct number of base graph ids").  Why add this one?

Ok, we can require the chunk methods to write an error() message with
appropriate context and simply return -1 here.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index f33bfe49b3..086fc2d070 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1555,9 +1555,13 @@  static int write_graph_chunk_base(struct hashfile *f,
 	return 0;
 }
 
+typedef int (*chunk_write_fn)(struct hashfile *f,
+			      struct write_commit_graph_context *ctx);
+
 struct chunk_info {
 	uint32_t id;
 	uint64_t size;
+	chunk_write_fn write_fn;
 };
 
 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
@@ -1615,27 +1619,34 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 
 	chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
 	chunks[0].size = GRAPH_FANOUT_SIZE;
+	chunks[0].write_fn = write_graph_chunk_fanout;
 	chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
 	chunks[1].size = hashsz * ctx->commits.nr;
+	chunks[1].write_fn = write_graph_chunk_oids;
 	chunks[2].id = GRAPH_CHUNKID_DATA;
 	chunks[2].size = (hashsz + 16) * ctx->commits.nr;
+	chunks[2].write_fn = write_graph_chunk_data;
 	if (ctx->num_extra_edges) {
 		chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
 		chunks[num_chunks].size = 4 * ctx->num_extra_edges;
+		chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
 		num_chunks++;
 	}
 	if (ctx->changed_paths) {
 		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
 		chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
+		chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
 		num_chunks++;
 		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
 		chunks[num_chunks].size = sizeof(uint32_t) * 3
 					  + ctx->total_bloom_filter_data_size;
+		chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
 		num_chunks++;
 	}
 	if (ctx->num_commit_graphs_after > 1) {
 		chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
 		chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
+		chunks[num_chunks].write_fn = write_graph_chunk_base;
 		num_chunks++;
 	}
 
@@ -1671,19 +1682,15 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 			progress_title.buf,
 			num_chunks * ctx->commits.nr);
 	}
-	write_graph_chunk_fanout(f, ctx);
-	write_graph_chunk_oids(f, ctx);
-	write_graph_chunk_data(f, ctx);
-	if (ctx->num_extra_edges)
-		write_graph_chunk_extra_edges(f, ctx);
-	if (ctx->changed_paths) {
-		write_graph_chunk_bloom_indexes(f, ctx);
-		write_graph_chunk_bloom_data(f, ctx);
-	}
-	if (ctx->num_commit_graphs_after > 1 &&
-	    write_graph_chunk_base(f, ctx)) {
-		return -1;
+
+	for (i = 0; i < num_chunks; i++) {
+		if (chunks[i].write_fn(f, ctx)) {
+			error(_("failed writing chunk with id %"PRIx32""),
+			      chunks[i].id);
+			return -1;
+		}
 	}
+
 	stop_progress(&ctx->progress);
 	strbuf_release(&progress_title);