diff mbox series

[06/11] refs/reftable: allow configuring block size

Message ID b4e4db5735beb106511980adca48dc416c4b0d95.1714630191.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series reftable: expose write options as config | expand

Commit Message

Patrick Steinhardt May 2, 2024, 6:51 a.m. UTC
Add a new option `reftable.blockSize` that allows the user to control
the block size used by the reftable library.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Documentation/config.txt          |  2 +
 Documentation/config/reftable.txt | 14 ++++++
 refs/reftable-backend.c           | 32 +++++++++++++-
 t/t0613-reftable-write-options.sh | 72 +++++++++++++++++++++++++++++++
 4 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/config/reftable.txt

Comments

Karthik Nayak May 10, 2024, 9:29 a.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

[snip]

> @@ -230,6 +231,23 @@ static int read_ref_without_reload(struct reftable_stack *stack,
>  	return ret;
>  }
>
> +static int reftable_be_config(const char *var, const char *value,
> +			      const struct config_context *ctx,
> +			      void *_opts)
> +{
> +	struct reftable_write_options *opts = _opts;
> +
> +	if (!strcmp(var, "reftable.blocksize")) {
> +		unsigned long block_size = git_config_ulong(var, value, ctx->kvi);
> +		if (block_size > 16777215)
> +			die("reftable block size cannot exceed 16MB");
> +		opts->block_size = block_size;
> +		return 0;

nit: unecessary return

> +	}
> +
> +	return 0;
> +}
> +
>  static struct ref_store *reftable_be_init(struct repository *repo,
>  					  const char *gitdir,
>  					  unsigned int store_flags)
> @@ -245,12 +263,24 @@ static struct ref_store *reftable_be_init(struct repository *repo,
>  	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
>  	strmap_init(&refs->worktree_stacks);
>  	refs->store_flags = store_flags;
> -	refs->write_options.block_size = 4096;
> +

Nit: do we need this newline?

>  	refs->write_options.hash_id = repo->hash_algo->format_id;
>  	refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask);
>  	refs->write_options.disable_auto_compact =
>  		!git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1);
>
> +	git_config(reftable_be_config, &refs->write_options);
> +
> +	/*
> +	 * It is somewhat unfortunate that we have to mirror the default block
> +	 * size of the reftable library here. But given that the write options
> +	 * wouldn't be updated by the library here, and given that we require
> +	 * the proper block size to trim reflog message so that they fit, we
> +	 * must set up a proper value here.
> +	 */
> +	if (!refs->write_options.block_size)
> +		refs->write_options.block_size = 4096;
> +

Wouldn't it be to import and use `reftable/constants.h` here?

[snip]
Patrick Steinhardt May 10, 2024, 10:13 a.m. UTC | #2
On Fri, May 10, 2024 at 02:29:19AM -0700, Karthik Nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> [snip]
> 
> > @@ -230,6 +231,23 @@ static int read_ref_without_reload(struct reftable_stack *stack,
> >  	return ret;
> >  }
> >
> > +static int reftable_be_config(const char *var, const char *value,
> > +			      const struct config_context *ctx,
> > +			      void *_opts)
> > +{
> > +	struct reftable_write_options *opts = _opts;
> > +
> > +	if (!strcmp(var, "reftable.blocksize")) {
> > +		unsigned long block_size = git_config_ulong(var, value, ctx->kvi);
> > +		if (block_size > 16777215)
> > +			die("reftable block size cannot exceed 16MB");
> > +		opts->block_size = block_size;
> > +		return 0;
> 
> nit: unecessary return

It's unnecessary indeed. I first wanted to defend this, but then I
noticed that I'm also being inconsistent here where the last branch
won't have `return 0;` at the end of this series.

Will remove.

> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static struct ref_store *reftable_be_init(struct repository *repo,
> >  					  const char *gitdir,
> >  					  unsigned int store_flags)
> > @@ -245,12 +263,24 @@ static struct ref_store *reftable_be_init(struct repository *repo,
> >  	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
> >  	strmap_init(&refs->worktree_stacks);
> >  	refs->store_flags = store_flags;
> > -	refs->write_options.block_size = 4096;
> > +
> 
> Nit: do we need this newline?

I think it's easier to read that way.

> >  	refs->write_options.hash_id = repo->hash_algo->format_id;
> >  	refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask);
> >  	refs->write_options.disable_auto_compact =
> >  		!git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1);
> >
> > +	git_config(reftable_be_config, &refs->write_options);
> > +
> > +	/*
> > +	 * It is somewhat unfortunate that we have to mirror the default block
> > +	 * size of the reftable library here. But given that the write options
> > +	 * wouldn't be updated by the library here, and given that we require
> > +	 * the proper block size to trim reflog message so that they fit, we
> > +	 * must set up a proper value here.
> > +	 */
> > +	if (!refs->write_options.block_size)
> > +		refs->write_options.block_size = 4096;
> > +
> 
> Wouldn't it be to import and use `reftable/constants.h` here?

Headers in the "reftable/" directory which do not have a "reftable-"
prefix are considered to be private. So those shouldn't be used.

Patrick
diff mbox series

Patch

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 70b448b132..fa1469e5e7 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -497,6 +497,8 @@  include::config/rebase.txt[]
 
 include::config/receive.txt[]
 
+include::config/reftable.txt[]
+
 include::config/remote.txt[]
 
 include::config/remotes.txt[]
diff --git a/Documentation/config/reftable.txt b/Documentation/config/reftable.txt
new file mode 100644
index 0000000000..fa7c4be014
--- /dev/null
+++ b/Documentation/config/reftable.txt
@@ -0,0 +1,14 @@ 
+reftable.blockSize::
+	The size in bytes used by the reftable backend when writing blocks.
+	The block size is determined by the writer, and does not have to be a
+	power of 2. The block size must be larger than the longest reference
+	name or log entry used in the repository, as references cannot span
+	blocks.
++
+Powers of two that are friendly to the virtual memory system or
+filesystem (such as 4kB or 8kB) are recommended. Larger sizes (64kB) can
+yield better compression, with a possible increased cost incurred by
+readers during access.
++
+The largest block size is `16777215` bytes (15.99 MiB). The default value is
+`4096` bytes (4kB). A value of `0` will use the default value.
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 1cda48c504..c2c47a3bc1 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1,6 +1,7 @@ 
 #include "../git-compat-util.h"
 #include "../abspath.h"
 #include "../chdir-notify.h"
+#include "../config.h"
 #include "../environment.h"
 #include "../gettext.h"
 #include "../hash.h"
@@ -230,6 +231,23 @@  static int read_ref_without_reload(struct reftable_stack *stack,
 	return ret;
 }
 
+static int reftable_be_config(const char *var, const char *value,
+			      const struct config_context *ctx,
+			      void *_opts)
+{
+	struct reftable_write_options *opts = _opts;
+
+	if (!strcmp(var, "reftable.blocksize")) {
+		unsigned long block_size = git_config_ulong(var, value, ctx->kvi);
+		if (block_size > 16777215)
+			die("reftable block size cannot exceed 16MB");
+		opts->block_size = block_size;
+		return 0;
+	}
+
+	return 0;
+}
+
 static struct ref_store *reftable_be_init(struct repository *repo,
 					  const char *gitdir,
 					  unsigned int store_flags)
@@ -245,12 +263,24 @@  static struct ref_store *reftable_be_init(struct repository *repo,
 	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
 	strmap_init(&refs->worktree_stacks);
 	refs->store_flags = store_flags;
-	refs->write_options.block_size = 4096;
+
 	refs->write_options.hash_id = repo->hash_algo->format_id;
 	refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask);
 	refs->write_options.disable_auto_compact =
 		!git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1);
 
+	git_config(reftable_be_config, &refs->write_options);
+
+	/*
+	 * It is somewhat unfortunate that we have to mirror the default block
+	 * size of the reftable library here. But given that the write options
+	 * wouldn't be updated by the library here, and given that we require
+	 * the proper block size to trim reflog message so that they fit, we
+	 * must set up a proper value here.
+	 */
+	if (!refs->write_options.block_size)
+		refs->write_options.block_size = 4096;
+
 	/*
 	 * Set up the main reftable stack that is hosted in GIT_COMMON_DIR.
 	 * This stack contains both the shared and the main worktree refs.
diff --git a/t/t0613-reftable-write-options.sh b/t/t0613-reftable-write-options.sh
index 462980c37c..8bdbc6ec70 100755
--- a/t/t0613-reftable-write-options.sh
+++ b/t/t0613-reftable-write-options.sh
@@ -99,4 +99,76 @@  test_expect_success 'many refs results in multiple blocks' '
 	)
 '
 
+test_expect_success 'tiny block size leads to error' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		test_commit initial &&
+		cat >expect <<-EOF &&
+		error: unable to compact stack: entry too large
+		EOF
+		test_must_fail git -c reftable.blockSize=50 pack-refs 2>err &&
+		test_cmp expect err
+	)
+'
+
+test_expect_success 'small block size leads to multiple ref blocks' '
+	test_config_global core.logAllRefUpdates false &&
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		test_commit A &&
+		test_commit B &&
+		git -c reftable.blockSize=100 pack-refs &&
+
+		cat >expect <<-EOF &&
+		header:
+		  block_size: 100
+		ref:
+		  - length: 53
+		    restarts: 1
+		  - length: 74
+		    restarts: 1
+		  - length: 38
+		    restarts: 1
+		EOF
+		test-tool dump-reftable -b .git/reftable/*.ref >actual &&
+		test_cmp expect actual
+	)
+'
+
+test_expect_success 'small block size fails with large reflog message' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		test_commit A &&
+		perl -e "print \"a\" x 500" >logmsg &&
+		cat >expect <<-EOF &&
+		fatal: update_ref failed for ref ${SQ}refs/heads/logme${SQ}: reftable: transaction failure: entry too large
+		EOF
+		test_must_fail git -c reftable.blockSize=100 \
+			update-ref -m "$(cat logmsg)" refs/heads/logme HEAD 2>err &&
+		test_cmp expect err
+	)
+'
+
+test_expect_success 'block size exceeding maximum supported size' '
+	test_config_global core.logAllRefUpdates false &&
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		test_commit A &&
+		test_commit B &&
+		cat >expect <<-EOF &&
+		fatal: reftable block size cannot exceed 16MB
+		EOF
+		test_must_fail git -c reftable.blockSize=16777216 pack-refs 2>err &&
+		test_cmp expect err
+	)
+'
+
 test_done