diff mbox series

[v2,7/8] fetch: after repair, encourage auto gc repacking

Message ID 2338c15249a3a58032bc1f8b0cd029f3897b4e88.1645719219.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series fetch: add repair: full refetch without negotiation (was: "refiltering") | expand

Commit Message

Robert Coup Feb. 24, 2022, 4:13 p.m. UTC
From: Robert Coup <robert@coup.net.nz>

After invoking `fetch --repair`, the object db will likely contain many
duplicate objects. If auto-maintenance is enabled, invoke it with
appropriate settings to encourage repacking/consolidation.

* gc.autoPackLimit: unless this is set to 0 (disabled), override the
  value to 1 to force pack consolidation.
* maintenance.incremental-repack.auto: unless this is set to 0, override
  the value to -1 to force incremental repacking.

Signed-off-by: Robert Coup <robert@coup.net.nz>
---
 Documentation/fetch-options.txt |  3 ++-
 builtin/fetch.c                 | 23 +++++++++++++++++++++--
 t/t5616-partial-clone.sh        |  6 ++++--
 3 files changed, 27 insertions(+), 5 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Feb. 28, 2022, 4:40 p.m. UTC | #1
On Thu, Feb 24 2022, Robert Coup via GitGitGadget wrote:

> From: Robert Coup <robert@coup.net.nz>
>
> After invoking `fetch --repair`, the object db will likely contain many
> duplicate objects. If auto-maintenance is enabled, invoke it with
> appropriate settings to encourage repacking/consolidation.
>
> * gc.autoPackLimit: unless this is set to 0 (disabled), override the
>   value to 1 to force pack consolidation.
> * maintenance.incremental-repack.auto: unless this is set to 0, override
>   the value to -1 to force incremental repacking.
>
> Signed-off-by: Robert Coup <robert@coup.net.nz>
> ---
>  Documentation/fetch-options.txt |  3 ++-
>  builtin/fetch.c                 | 23 +++++++++++++++++++++--
>  t/t5616-partial-clone.sh        |  6 ++++--
>  3 files changed, 27 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index 1131aaad252..73abafdfc41 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -169,7 +169,8 @@ ifndef::git-pull[]
>  	associated objects that are already present locally, this option fetches
>  	all objects as a fresh clone would. Use this to reapply a partial clone
>  	filter from configuration or using `--filter=` when the filter
> -	definition has changed.
> +	definition has changed. Automatic post-fetch maintenance will perform
> +	object database pack consolidation to remove any duplicate objects.
>  endif::git-pull[]
>  
>  --refmap=<refspec>::
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index f32b24d182b..7d023341ac0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -2020,6 +2020,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  	struct remote *remote = NULL;
>  	int result = 0;
>  	int prune_tags_ok = 1;
> +	struct strvec auto_maint_opts = STRVEC_INIT;

[Nits, but aside from earlier comments about config options v.s. config[]

this variable...

> +	int opt_val;

...and this...
>  
>  	packet_trace_identity("fetch");
>  
> @@ -2226,10 +2228,27 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
>  					     NULL);
>  	}
>  
> -	if (enable_auto_gc)
> -		run_auto_maintenance(verbosity < 0, NULL);
> +	if (enable_auto_gc) {

...can just be declared in this scope.

> +		if (repair) {

I think having:

    if (enable_auto_gc && repair)

Might make this more readable without the extra indentation, but of
course then the variables need to be at the top-level... :)

> +			/*
> +			 * Hint auto-maintenance strongly to encourage repacking,
> +			 * but respect config settings disabling it.
> +			 */
> +			if (git_config_get_int("gc.autopacklimit", &opt_val))
> +				opt_val = -1;
> +			if (opt_val != 0)
> +				strvec_push(&auto_maint_opts, "gc.autoPackLimit=1");
> +
> +			if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val))
> +				opt_val = -1;
> +			if (opt_val != 0)
> +				strvec_push(&auto_maint_opts, "maintenance.incremental-repack.auto=-1");
> +		}
> +		run_auto_maintenance(verbosity < 0, &auto_maint_opts);
> +	}
>  
>   cleanup:
>  	string_list_clear(&list, 0);
> +	strvec_clear(&auto_maint_opts);
>  	return result;
>  }
> diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
> index 230b2dcbc94..60f1817cda6 100755
> --- a/t/t5616-partial-clone.sh
> +++ b/t/t5616-partial-clone.sh
> @@ -187,7 +187,7 @@ test_expect_success 'push new commits to server for file.4.txt' '
>  # Do partial fetch to fetch smaller files; then verify that without --repair
>  # applying a new filter does not refetch missing large objects. Then use
>  # --repair to apply the new filter on existing commits. Test it under both
> -# protocol v2 & v0.
> +# protocol v2 & v0. Check repacking auto-maintenance is kicked off.
>  test_expect_success 'apply a different filter using --repair' '
>  	git -C pc1 fetch --filter=blob:limit=999 origin &&
>  	git -C pc1 rev-list --quiet --objects --missing=print \
> @@ -199,11 +199,13 @@ test_expect_success 'apply a different filter using --repair' '
>  		main..origin/main >observed &&
>  	test_line_count = 2 observed &&
>  
> +	GIT_TRACE2_EVENT="$(pwd)/trace.log" \

Nit: Better to use $PWD instead of $(pwd), works here, but won't be
compatible with -x if we ever want to test stderr.

>  	git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 \
>  		--repair origin &&
>  	git -C pc1 rev-list --quiet --objects --missing=print \
>  		main..origin/main >observed &&
> -	test_line_count = 0 observed
> +	test_line_count = 0 observed &&
> +	test_subcommand git -c gc.autoPackLimit=1 -c maintenance.incremental-repack.auto=-1 maintenance run --auto --no-quiet <trace.log
>  '
>  
>  test_expect_success 'fetch --repair works with a shallow clone' '
diff mbox series

Patch

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 1131aaad252..73abafdfc41 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -169,7 +169,8 @@  ifndef::git-pull[]
 	associated objects that are already present locally, this option fetches
 	all objects as a fresh clone would. Use this to reapply a partial clone
 	filter from configuration or using `--filter=` when the filter
-	definition has changed.
+	definition has changed. Automatic post-fetch maintenance will perform
+	object database pack consolidation to remove any duplicate objects.
 endif::git-pull[]
 
 --refmap=<refspec>::
diff --git a/builtin/fetch.c b/builtin/fetch.c
index f32b24d182b..7d023341ac0 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -2020,6 +2020,8 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 	struct remote *remote = NULL;
 	int result = 0;
 	int prune_tags_ok = 1;
+	struct strvec auto_maint_opts = STRVEC_INIT;
+	int opt_val;
 
 	packet_trace_identity("fetch");
 
@@ -2226,10 +2228,27 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 					     NULL);
 	}
 
-	if (enable_auto_gc)
-		run_auto_maintenance(verbosity < 0, NULL);
+	if (enable_auto_gc) {
+		if (repair) {
+			/*
+			 * Hint auto-maintenance strongly to encourage repacking,
+			 * but respect config settings disabling it.
+			 */
+			if (git_config_get_int("gc.autopacklimit", &opt_val))
+				opt_val = -1;
+			if (opt_val != 0)
+				strvec_push(&auto_maint_opts, "gc.autoPackLimit=1");
+
+			if (git_config_get_int("maintenance.incremental-repack.auto", &opt_val))
+				opt_val = -1;
+			if (opt_val != 0)
+				strvec_push(&auto_maint_opts, "maintenance.incremental-repack.auto=-1");
+		}
+		run_auto_maintenance(verbosity < 0, &auto_maint_opts);
+	}
 
  cleanup:
 	string_list_clear(&list, 0);
+	strvec_clear(&auto_maint_opts);
 	return result;
 }
diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 230b2dcbc94..60f1817cda6 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -187,7 +187,7 @@  test_expect_success 'push new commits to server for file.4.txt' '
 # Do partial fetch to fetch smaller files; then verify that without --repair
 # applying a new filter does not refetch missing large objects. Then use
 # --repair to apply the new filter on existing commits. Test it under both
-# protocol v2 & v0.
+# protocol v2 & v0. Check repacking auto-maintenance is kicked off.
 test_expect_success 'apply a different filter using --repair' '
 	git -C pc1 fetch --filter=blob:limit=999 origin &&
 	git -C pc1 rev-list --quiet --objects --missing=print \
@@ -199,11 +199,13 @@  test_expect_success 'apply a different filter using --repair' '
 		main..origin/main >observed &&
 	test_line_count = 2 observed &&
 
+	GIT_TRACE2_EVENT="$(pwd)/trace.log" \
 	git -c protocol.version=0 -C pc1 fetch --filter=blob:limit=29999 \
 		--repair origin &&
 	git -C pc1 rev-list --quiet --objects --missing=print \
 		main..origin/main >observed &&
-	test_line_count = 0 observed
+	test_line_count = 0 observed &&
+	test_subcommand git -c gc.autoPackLimit=1 -c maintenance.incremental-repack.auto=-1 maintenance run --auto --no-quiet <trace.log
 '
 
 test_expect_success 'fetch --repair works with a shallow clone' '