diff mbox series

[24/30] repack: add --path-walk option

Message ID d3284d090d36e3bff3816123e9939ef0128f323e.1725935335.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series Path-walk API and applications | expand

Commit Message

Derrick Stolee Sept. 10, 2024, 2:28 a.m. UTC
From: Derrick Stolee <stolee@gmail.com>

Since 'git pack-objects' supports a --path-walk option, allow passing it
through in 'git repack'. This presents interesting testing opportunities for
comparing the different repacking strategies against each other.

For the microsoft/fluentui repo [1], the results are very interesting:

Test                                            this tree
-------------------------------------------------------------------
5313.10: full repack                            97.91(663.47+2.83)
5313.11: full repack size                                449.1K
5313.12: full repack with --path-walk           105.42(120.49+0.95)
5313.13: full repack size with --path-walk               159.1K

[1] https://github.com/microsoft/fluentui

This repo suffers from having a lot of paths that collide in the name
hash, so examining them in groups by path leads to better deltas. Also,
in this case, the single-threaded implementation is competitive with the
full repack. This is saving time diffing files that have significant
differences from each other.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 builtin/repack.c             |  5 +++++
 t/perf/p5313-pack-objects.sh | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)
diff mbox series

Patch

diff --git a/builtin/repack.c b/builtin/repack.c
index 62cfa50c50f..9e39a1ea8f8 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -57,6 +57,7 @@  struct pack_objects_args {
 	int no_reuse_object;
 	int quiet;
 	int local;
+	int path_walk;
 	struct list_objects_filter_options filter_options;
 };
 
@@ -288,6 +289,8 @@  static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--no-reuse-delta");
 	if (args->no_reuse_object)
 		strvec_pushf(&cmd->args, "--no-reuse-object");
+	if (args->path_walk)
+		strvec_pushf(&cmd->args, "--path-walk");
 	if (args->local)
 		strvec_push(&cmd->args,  "--local");
 	if (args->quiet)
@@ -1158,6 +1161,8 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("pass --no-reuse-delta to git-pack-objects")),
 		OPT_BOOL('F', NULL, &po_args.no_reuse_object,
 				N_("pass --no-reuse-object to git-pack-objects")),
+		OPT_BOOL(0, "path-walk", &po_args.path_walk,
+				N_("pass --path-walk to git-pack-objects")),
 		OPT_NEGBIT('n', NULL, &run_update_server_info,
 				N_("do not run git-update-server-info"), 1),
 		OPT__QUIET(&po_args.quiet, N_("be quiet")),
diff --git a/t/perf/p5313-pack-objects.sh b/t/perf/p5313-pack-objects.sh
index fdcdf188f95..48fc05bb6c6 100755
--- a/t/perf/p5313-pack-objects.sh
+++ b/t/perf/p5313-pack-objects.sh
@@ -52,4 +52,24 @@  test_size 'big recent pack size with --path-walk' '
 	wc -c <out
 '
 
+test_perf 'full repack' '
+	git repack -adf --no-write-bitmap-index
+'
+
+test_size 'full repack size' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
+test_perf 'full repack with --path-walk' '
+	git repack -adf --no-write-bitmap-index --path-walk
+'
+
+test_size 'full repack size with --path-walk' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
 test_done