diff mbox series

[10/10] builtin/commit-graph.c: introduce '--max-new-filters=<n>'

Message ID 8b670571dc24973cc5e894b866a68236d8fbfa4f.1596480582.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series more miscellaneous Bloom filter improvements | expand

Commit Message

Taylor Blau Aug. 3, 2020, 6:57 p.m. UTC
Introduce a command-line flag and configuration variable to fill in the
'max_new_filters' variable introduced by the previous patch.

The command-line option '--max-new-filters' takes precedence over
'graph.maxNewFilters', which is the default value.
'--no-max-new-filters' can also be provided, which sets the value back
to '-1', indicating that an unlimited number of new Bloom filters may be
generated. (OPT_INTEGER only allows setting the '--no-' variant back to
'0', hence a custom callback was used instead).

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/config/commitgraph.txt |  4 +++
 Documentation/git-commit-graph.txt   |  4 +++
 bloom.c                              | 15 +++++++++++
 builtin/commit-graph.c               | 39 +++++++++++++++++++++++++---
 commit-graph.c                       | 22 +++++++++++-----
 commit-graph.h                       |  1 +
 t/t4216-log-bloom.sh                 | 19 ++++++++++++++
 7 files changed, 95 insertions(+), 9 deletions(-)

Comments

Derrick Stolee Aug. 4, 2020, 1:03 p.m. UTC | #1
On 8/3/2020 2:57 PM, Taylor Blau wrote:
> Introduce a command-line flag and configuration variable to fill in the
> 'max_new_filters' variable introduced by the previous patch.
> 
> The command-line option '--max-new-filters' takes precedence over
> 'graph.maxNewFilters', which is the default value.

commitGraph.maxNewFilters?

> '--no-max-new-filters' can also be provided, which sets the value back
> to '-1', indicating that an unlimited number of new Bloom filters may be
> generated. (OPT_INTEGER only allows setting the '--no-' variant back to
> '0', hence a custom callback was used instead).

Other than the commit message typo above, this is a good patch.

Thanks,
-Stolee
Taylor Blau Aug. 4, 2020, 8:14 p.m. UTC | #2
On Tue, Aug 04, 2020 at 09:03:36AM -0400, Derrick Stolee wrote:
> On 8/3/2020 2:57 PM, Taylor Blau wrote:
> > Introduce a command-line flag and configuration variable to fill in the
> > 'max_new_filters' variable introduced by the previous patch.
> >
> > The command-line option '--max-new-filters' takes precedence over
> > 'graph.maxNewFilters', which is the default value.
>
> commitGraph.maxNewFilters?

Nice catch. There are some other places that I made this typo, but I
fixed those up locally, too.

> > '--no-max-new-filters' can also be provided, which sets the value back
> > to '-1', indicating that an unlimited number of new Bloom filters may be
> > generated. (OPT_INTEGER only allows setting the '--no-' variant back to
> > '0', hence a custom callback was used instead).
>
> Other than the commit message typo above, this is a good patch.

Thanks, and thanks for the good suggestion to write it in the first
place ;).

> Thanks,
> -Stolee

Thanks,
Taylor
diff mbox series

Patch

diff --git a/Documentation/config/commitgraph.txt b/Documentation/config/commitgraph.txt
index bb78e72f1b..46f48d1aa8 100644
--- a/Documentation/config/commitgraph.txt
+++ b/Documentation/config/commitgraph.txt
@@ -1,3 +1,7 @@ 
+commitgraph.maxNewFilters::
+	Specifies the default value for the `--max-new-filters` option of `git
+	commit-graph write` (c.f., linkgit:git-commit-graph[1]).
+
 commitgraph.readChangedPaths::
 	If true, then git will use the changed-path Bloom filters in the
 	commit-graph file (if it exists, and they are present). Defaults to
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 17405c73a9..e8034ff0ac 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -67,6 +67,10 @@  this option is given, future commit-graph writes will automatically assume
 that this option was intended. Use `--no-changed-paths` to stop storing this
 data.
 +
+With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
+filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
+enforced. Overrides the `graph.maxNewFilters` configuration.
++
 With the `--split[=<strategy>]` option, write the commit-graph as a
 chain of multiple commit-graph files stored in
 `<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
diff --git a/bloom.c b/bloom.c
index a8a21762f4..1742beac9e 100644
--- a/bloom.c
+++ b/bloom.c
@@ -51,6 +51,21 @@  static int load_bloom_filter_from_graph(struct commit_graph *g,
 	else
 		start_index = 0;
 
+	if ((start_index == end_index) &&
+	    (g->bloom_large.word_alloc && !bitmap_get(&g->bloom_large, lex_pos))) {
+		/*
+		 * If the filter is zero-length, either (1) the filter has no
+		 * changes, (2) the filter has too many changes, or (3) it
+		 * wasn't computed (eg., due to '--max-new-filters').
+		 *
+		 * If either (1) or (2) is the case, the 'large' bit will be set
+		 * for this Bloom filter. If it is unset, then it wasn't
+		 * computed. In that case, return nothing, since we don't have
+		 * that filter in the graph.
+		 */
+		return 0;
+	}
+
 	filter->len = end_index - start_index;
 	filter->data = (unsigned char *)(g->chunk_bloom_data +
 					sizeof(unsigned char) * start_index +
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 38f5f57d15..aaf54a76db 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -13,7 +13,8 @@  static char const * const builtin_commit_graph_usage[] = {
 	N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--changed-paths] [--[no-]progress] <split options>"),
+	   "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+	   "<split options>"),
 	NULL
 };
 
@@ -25,7 +26,8 @@  static const char * const builtin_commit_graph_verify_usage[] = {
 static const char * const builtin_commit_graph_write_usage[] = {
 	N_("git commit-graph write [--object-dir <objdir>] [--append] "
 	   "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-	   "[--changed-paths] [--[no-]progress] <split options>"),
+	   "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+	   "<split options>"),
 	NULL
 };
 
@@ -162,6 +164,23 @@  static int read_one_commit(struct oidset *commits, struct progress *progress,
 	return 0;
 }
 
+static int write_option_max_new_filters(const struct option *opt,
+					const char *arg,
+					int unset)
+{
+	int *to = opt->value;
+	if (unset)
+		*to = -1;
+	else {
+		const char *s;
+		*to = strtol(arg, (char **)&s, 10);
+		if (*s)
+			return error(_("%s expects a numerical value"),
+				     optname(opt, opt->flags));
+	}
+	return 0;
+}
+
 static int graph_write(int argc, const char **argv)
 {
 	struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -197,6 +216,9 @@  static int graph_write(int argc, const char **argv)
 			N_("maximum ratio between two levels of a split commit-graph")),
 		OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
 			N_("only expire files older than a given date-time")),
+		OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
+			NULL, N_("maximum number of Bloom filters to compute"),
+			0, write_option_max_new_filters),
 		OPT_END(),
 	};
 
@@ -205,6 +227,7 @@  static int graph_write(int argc, const char **argv)
 	write_opts.size_multiple = 2;
 	write_opts.max_commits = 0;
 	write_opts.expire_time = 0;
+	write_opts.max_new_filters = -1;
 
 	trace2_cmd_mode("write");
 
@@ -270,6 +293,16 @@  static int graph_write(int argc, const char **argv)
 	return result;
 }
 
+static int git_commit_graph_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "graph.maxnewfilters")) {
+		write_opts.max_new_filters = git_config_int(var, value);
+		return 0;
+	}
+
+	return git_default_config(var, value, cb);
+}
+
 int cmd_commit_graph(int argc, const char **argv, const char *prefix)
 {
 	static struct option builtin_commit_graph_options[] = {
@@ -283,7 +316,7 @@  int cmd_commit_graph(int argc, const char **argv, const char *prefix)
 		usage_with_options(builtin_commit_graph_usage,
 				   builtin_commit_graph_options);
 
-	git_config(git_default_config, NULL);
+	git_config(git_commit_graph_config, &opts);
 	argc = parse_options(argc, argv, prefix,
 			     builtin_commit_graph_options,
 			     builtin_commit_graph_usage,
diff --git a/commit-graph.c b/commit-graph.c
index 8626024faa..87ea0437e2 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1455,6 +1455,7 @@  static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 	int i;
 	struct progress *progress = NULL;
 	int *sorted_commits;
+	int max_new_filters;
 
 	init_bloom_filters();
 	ctx->bloom_large = bitmap_word_alloc(ctx->commits.nr / BITS_IN_EWORD);
@@ -1471,6 +1472,9 @@  static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 		ctx->order_by_pack ? commit_pos_cmp : commit_gen_cmp,
 		&ctx->commits);
 
+	max_new_filters = ctx->opts->max_new_filters >= 0 ?
+		ctx->opts->max_new_filters : ctx->commits.nr;
+
 	for (i = 0; i < ctx->commits.nr; i++) {
 		int pos = sorted_commits[i];
 		struct commit *c = ctx->commits.list[pos];
@@ -1478,13 +1482,19 @@  static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 			bitmap_set(ctx->bloom_large, pos);
 			ctx->count_bloom_filter_known_large++;
 		} else {
-			struct bloom_filter *filter = get_or_compute_bloom_filter(ctx->r, c, 1, NULL);
-			if (!filter->len) {
-				bitmap_set(ctx->bloom_large, pos);
-				ctx->count_bloom_filter_found_large++;
+			int computed = 0;
+			struct bloom_filter *filter = get_or_compute_bloom_filter(
+				ctx->r, c,
+				ctx->count_bloom_filter_computed < max_new_filters,
+				&computed);
+			if (computed) {
+				ctx->count_bloom_filter_computed++;
+				if (filter && !filter->len) {
+					bitmap_set(ctx->bloom_large, pos);
+					ctx->count_bloom_filter_found_large++;
+				}
 			}
-			ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
-			ctx->count_bloom_filter_computed++;
+			ctx->total_bloom_filter_data_size += sizeof(unsigned char) * (filter ? filter->len : 0);
 		}
 		display_progress(progress, i + 1);
 	}
diff --git a/commit-graph.h b/commit-graph.h
index 4cd991cf26..0842d13744 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -112,6 +112,7 @@  struct commit_graph_opts {
 	int max_commits;
 	timestamp_t expire_time;
 	enum commit_graph_split_flags flags;
+	int max_new_filters;
 };
 
 /*
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 231dc8a3a7..985bea23b2 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -214,4 +214,23 @@  test_expect_success 'Bloom generation does not recompute too-large filters' '
 	)
 '
 
+test_expect_success 'Bloom generation is limited by --max-new-filters' '
+	(
+		cd 513changes &&
+		test_commit c2 filter &&
+		test_commit c3 filter &&
+		test_commit c4 no-filter &&
+		test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=2" \
+			1 0 2
+	)
+'
+
+test_expect_success 'Bloom generation backfills previously-skipped filters' '
+	(
+		cd 513changes &&
+		test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=1" \
+			1 0 1
+	)
+'
+
 test_done