diff mbox series

[1/8] commit-graph: place bloom_settings in context

Message ID c966969071bf0ba135d304bbc0d5c16fbcbedc1e.1592252093.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series More commit-graph/Bloom filter improvements | expand

Commit Message

John Passaro via GitGitGadget June 15, 2020, 8:14 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

Place an instance of struct bloom_settings into the struct
write_commit_graph_context. This allows simplifying the function
prototype of write_graph_chunk_bloom_data(). This will allow us
to combine the function prototypes and use function pointers to
simplify write_commit_graph_file().

Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-graph.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Comments

René Scharfe June 18, 2020, 8:30 p.m. UTC | #1
Am 15.06.20 um 22:14 schrieb Derrick Stolee via GitGitGadget:
> From: Derrick Stolee <dstolee@microsoft.com>
>
> Place an instance of struct bloom_settings into the struct
> write_commit_graph_context. This allows simplifying the function
> prototype of write_graph_chunk_bloom_data(). This will allow us
> to combine the function prototypes and use function pointers to
> simplify write_commit_graph_file().
>
> Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  commit-graph.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/commit-graph.c b/commit-graph.c
> index 887837e8826..05b7035d8d5 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -882,6 +882,7 @@ struct write_commit_graph_context {
>
>  	const struct split_commit_graph_opts *split_opts;
>  	size_t total_bloom_filter_data_size;
> +	struct bloom_filter_settings bloom_settings;

That structure is quite busy already, so adding one more member wouldn't
matter much.

Passing so many things to lots of functions makes it harder to argue
about them, though, as all of them effectively become part of their
signature, and you have to look at their implementation to see which
pseudo-parameters they actually use.  It's like a God object.

>  };
>
>  static void write_graph_chunk_fanout(struct hashfile *f,
> @@ -1103,8 +1104,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
>  }
>
>  static void write_graph_chunk_bloom_data(struct hashfile *f,
> -					 struct write_commit_graph_context *ctx,
> -					 const struct bloom_filter_settings *settings)
> +					 struct write_commit_graph_context *ctx)
>  {
>  	struct commit **list = ctx->commits.list;
>  	struct commit **last = ctx->commits.list + ctx->commits.nr;
> @@ -1116,9 +1116,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
>  			_("Writing changed paths Bloom filters data"),
>  			ctx->commits.nr);
>
> -	hashwrite_be32(f, settings->hash_version);
> -	hashwrite_be32(f, settings->num_hashes);
> -	hashwrite_be32(f, settings->bits_per_entry);
> +	hashwrite_be32(f, ctx->bloom_settings.hash_version);
> +	hashwrite_be32(f, ctx->bloom_settings.num_hashes);
> +	hashwrite_be32(f, ctx->bloom_settings.bits_per_entry);
>
>  	while (list < last) {
>  		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
> @@ -1541,6 +1541,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>  	struct object_id file_hash;
>  	const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
>
> +	ctx->bloom_settings = bloom_settings;

So we use the defaults, no customization?  Then you could simply move
the declaration of bloom_settings from write_commit_graph_file() to
write_graph_chunk_bloom_data().  Glancing at pu I don't see additional
uses there, so no need to put it into the context (yet?).

René
Derrick Stolee June 19, 2020, 12:58 p.m. UTC | #2
On 6/18/2020 4:30 PM, René Scharfe wrote:
> Am 15.06.20 um 22:14 schrieb Derrick Stolee via GitGitGadget:
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> Place an instance of struct bloom_settings into the struct
>> write_commit_graph_context. This allows simplifying the function
>> prototype of write_graph_chunk_bloom_data(). This will allow us
>> to combine the function prototypes and use function pointers to
>> simplify write_commit_graph_file().
>>
>> Reported-by: SZEDER Gábor <szeder.dev@gmail.com>
>> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
>> ---
>>  commit-graph.c | 14 ++++++++------
>>  1 file changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/commit-graph.c b/commit-graph.c
>> index 887837e8826..05b7035d8d5 100644
>> --- a/commit-graph.c
>> +++ b/commit-graph.c
>> @@ -882,6 +882,7 @@ struct write_commit_graph_context {
>>
>>  	const struct split_commit_graph_opts *split_opts;
>>  	size_t total_bloom_filter_data_size;
>> +	struct bloom_filter_settings bloom_settings;
> 
> That structure is quite busy already, so adding one more member wouldn't
> matter much.
> 
> Passing so many things to lots of functions makes it harder to argue
> about them, though, as all of them effectively become part of their
> signature, and you have to look at their implementation to see which
> pseudo-parameters they actually use.  It's like a God object.

Correct. The write_commit_graph_context _is_ a God object for the
commit-graph write. The good news is that it is limited only to
commit-graph.c and the write operations therein. Hopefully, the
code organization benefits enough from this structure to justify
the massive struct.

In contrast, it's still smaller and more contained than
"struct rev_info"!

>>  };
>>
>>  static void write_graph_chunk_fanout(struct hashfile *f,
>> @@ -1103,8 +1104,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
>>  }
>>
>>  static void write_graph_chunk_bloom_data(struct hashfile *f,
>> -					 struct write_commit_graph_context *ctx,
>> -					 const struct bloom_filter_settings *settings)
>> +					 struct write_commit_graph_context *ctx)
>>  {
>>  	struct commit **list = ctx->commits.list;
>>  	struct commit **last = ctx->commits.list + ctx->commits.nr;
>> @@ -1116,9 +1116,9 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
>>  			_("Writing changed paths Bloom filters data"),
>>  			ctx->commits.nr);
>>
>> -	hashwrite_be32(f, settings->hash_version);
>> -	hashwrite_be32(f, settings->num_hashes);
>> -	hashwrite_be32(f, settings->bits_per_entry);
>> +	hashwrite_be32(f, ctx->bloom_settings.hash_version);
>> +	hashwrite_be32(f, ctx->bloom_settings.num_hashes);
>> +	hashwrite_be32(f, ctx->bloom_settings.bits_per_entry);
>>
>>  	while (list < last) {
>>  		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
>> @@ -1541,6 +1541,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
>>  	struct object_id file_hash;
>>  	const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
>>
>> +	ctx->bloom_settings = bloom_settings;
> 
> So we use the defaults, no customization?  Then you could simply move
> the declaration of bloom_settings from write_commit_graph_file() to
> write_graph_chunk_bloom_data().  Glancing at pu I don't see additional
> uses there, so no need to put it into the context (yet?).

It certainly is not customized by a user (yet). However, you do make an
excellent point that I need to be more careful here! Patch 8
(commit-graph: persist existence of changed-paths) needs to load the
bloom_filter_settings from the existing commit-graph so we can be
future-proof from a future version customizing the settings inside the
commit-graph file!

This means that in v2 I'll move patches 7 & 8 to be after patch 1 and
add a test to verify the filter settings are preserved (after manually
changing the data in the file).

Thanks!
-Stolee
diff mbox series

Patch

diff --git a/commit-graph.c b/commit-graph.c
index 887837e8826..05b7035d8d5 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -882,6 +882,7 @@  struct write_commit_graph_context {
 
 	const struct split_commit_graph_opts *split_opts;
 	size_t total_bloom_filter_data_size;
+	struct bloom_filter_settings bloom_settings;
 };
 
 static void write_graph_chunk_fanout(struct hashfile *f,
@@ -1103,8 +1104,7 @@  static void write_graph_chunk_bloom_indexes(struct hashfile *f,
 }
 
 static void write_graph_chunk_bloom_data(struct hashfile *f,
-					 struct write_commit_graph_context *ctx,
-					 const struct bloom_filter_settings *settings)
+					 struct write_commit_graph_context *ctx)
 {
 	struct commit **list = ctx->commits.list;
 	struct commit **last = ctx->commits.list + ctx->commits.nr;
@@ -1116,9 +1116,9 @@  static void write_graph_chunk_bloom_data(struct hashfile *f,
 			_("Writing changed paths Bloom filters data"),
 			ctx->commits.nr);
 
-	hashwrite_be32(f, settings->hash_version);
-	hashwrite_be32(f, settings->num_hashes);
-	hashwrite_be32(f, settings->bits_per_entry);
+	hashwrite_be32(f, ctx->bloom_settings.hash_version);
+	hashwrite_be32(f, ctx->bloom_settings.num_hashes);
+	hashwrite_be32(f, ctx->bloom_settings.bits_per_entry);
 
 	while (list < last) {
 		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
@@ -1541,6 +1541,8 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 	struct object_id file_hash;
 	const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
 
+	ctx->bloom_settings = bloom_settings;
+
 	if (ctx->split) {
 		struct strbuf tmp_file = STRBUF_INIT;
 
@@ -1642,7 +1644,7 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 		write_graph_chunk_extra_edges(f, ctx);
 	if (ctx->changed_paths) {
 		write_graph_chunk_bloom_indexes(f, ctx);
-		write_graph_chunk_bloom_data(f, ctx, &bloom_settings);
+		write_graph_chunk_bloom_data(f, ctx);
 	}
 	if (ctx->num_commit_graphs_after > 1 &&
 	    write_graph_chunk_base(f, ctx)) {