diff mbox series

[v2,10/12] bundle: create filtered bundles

Message ID 82d93fc62e24ebf4a46eb195e5b5a2ed46ad9e2d.1646689840.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Partial bundles | expand

Commit Message

Derrick Stolee March 7, 2022, 9:50 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

A previous change allowed Git to parse bundles with the 'filter'
capability. Now, teach Git to create bundles with this option.

Some rearranging of code is required to get the option parsing in the
correct spot. There are now two reasons why we might need capabilities
(a new hash algorithm or an object filter) so that is pulled out into a
place where we can check both at the same time.

The --filter option is parsed as part of setup_revisions(), but it
expected the --objects flag, too. That flag is somewhat implied by 'git
bundle' because it creates a pack-file walking objects, but there is
also a walk that walks the revision range expecting only commits. Make
this parsing work by setting 'revs.tree_objects' and 'revs.blob_objects'
before the call to setup_revisions().

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Documentation/git-bundle.txt |  4 +--
 bundle.c                     | 55 ++++++++++++++++++++++++++++--------
 t/t6020-bundle-misc.sh       | 48 +++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt
index 72ab8139052..831c4788a94 100644
--- a/Documentation/git-bundle.txt
+++ b/Documentation/git-bundle.txt
@@ -75,8 +75,8 @@  verify <file>::
 	cleanly to the current repository.  This includes checks on the
 	bundle format itself as well as checking that the prerequisite
 	commits exist and are fully linked in the current repository.
-	'git bundle' prints a list of missing commits, if any, and exits
-	with a non-zero status.
+	'git bundle' prints the bundle's object filter and its list of
+	missing commits, if any, and exits with a non-zero status.
 
 list-heads <file>::
 	Lists the references defined in the bundle.  If followed by a
diff --git a/bundle.c b/bundle.c
index 2afced4d991..0016d70310c 100644
--- a/bundle.c
+++ b/bundle.c
@@ -334,6 +334,9 @@  static int write_pack_data(int bundle_fd, struct rev_info *revs, struct strvec *
 		     "--stdout", "--thin", "--delta-base-offset",
 		     NULL);
 	strvec_pushv(&pack_objects.args, pack_options->v);
+	if (revs->filter)
+		strvec_pushf(&pack_objects.args, "--filter=%s",
+			     list_objects_filter_spec(revs->filter));
 	pack_objects.in = -1;
 	pack_objects.out = bundle_fd;
 	pack_objects.git_cmd = 1;
@@ -507,10 +510,37 @@  int create_bundle(struct repository *r, const char *path,
 	int bundle_to_stdout;
 	int ref_count = 0;
 	struct rev_info revs, revs_copy;
-	int min_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3;
+	int min_version = 2;
 	struct bundle_prerequisites_info bpi;
 	int i;
 
+	/* init revs to list objects for pack-objects later */
+	save_commit_buffer = 0;
+	repo_init_revisions(r, &revs, NULL);
+
+	/*
+	 * Pre-initialize the '--objects' flag so we can parse a
+	 * --filter option successfully.
+	 */
+	revs.tree_objects = revs.blob_objects = 1;
+
+	argc = setup_revisions(argc, argv, &revs, NULL);
+
+	/*
+	 * Reasons to require version 3:
+	 *
+	 * 1. @object-format is required because our hash algorithm is not
+	 *    SHA1.
+	 * 2. @filter is required because we parsed an object filter.
+	 */
+	if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter)
+		min_version = 3;
+
+	if (argc > 1) {
+		error(_("unrecognized argument: %s"), argv[1]);
+		goto err;
+	}
+
 	bundle_to_stdout = !strcmp(path, "-");
 	if (bundle_to_stdout)
 		bundle_fd = 1;
@@ -533,17 +563,14 @@  int create_bundle(struct repository *r, const char *path,
 		write_or_die(bundle_fd, capability, strlen(capability));
 		write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name));
 		write_or_die(bundle_fd, "\n", 1);
-	}
-
-	/* init revs to list objects for pack-objects later */
-	save_commit_buffer = 0;
-	repo_init_revisions(r, &revs, NULL);
 
-	argc = setup_revisions(argc, argv, &revs, NULL);
-
-	if (argc > 1) {
-		error(_("unrecognized argument: %s"), argv[1]);
-		goto err;
+		if (revs.filter) {
+			const char *value = expand_list_objects_filter_spec(revs.filter);
+			capability = "@filter=";
+			write_or_die(bundle_fd, capability, strlen(capability));
+			write_or_die(bundle_fd, value, strlen(value));
+			write_or_die(bundle_fd, "\n", 1);
+		}
 	}
 
 	/* save revs.pending in revs_copy for later use */
@@ -566,6 +593,12 @@  int create_bundle(struct repository *r, const char *path,
 	bpi.fd = bundle_fd;
 	bpi.pending = &revs_copy.pending;
 
+	/*
+	 * Nullify the filter here, and any object walking. We only care
+	 * about commits and tags here. The revs_copy has the right
+	 * instances of these values.
+	 */
+	revs.filter = NULL;
 	revs.blob_objects = revs.tree_objects = 0;
 	traverse_commit_list(&revs, write_bundle_prerequisites, ignore_object, &bpi);
 	object_array_remove_duplicates(&revs_copy.pending);
diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
index 6522401617d..f10cf011519 100755
--- a/t/t6020-bundle-misc.sh
+++ b/t/t6020-bundle-misc.sh
@@ -487,4 +487,52 @@  test_expect_success 'unfiltered bundle with --objects' '
 	test_cmp expect actual
 '
 
+for filter in "blob:none" "tree:0" "tree:1" "blob:limit=100"
+do
+	test_expect_success "filtered bundle: $filter" '
+		test_when_finished rm -rf .git/objects/pack cloned unbundled &&
+		git bundle create partial.bdl \
+			--all \
+			--filter=$filter &&
+
+		git bundle verify partial.bdl >unfiltered &&
+		make_user_friendly_and_stable_output <unfiltered >actual &&
+
+		cat >expect <<-EOF &&
+		The bundle contains these 10 refs:
+		<COMMIT-P> refs/heads/main
+		<COMMIT-N> refs/heads/release
+		<COMMIT-D> refs/heads/topic/1
+		<COMMIT-H> refs/heads/topic/2
+		<COMMIT-D> refs/pull/1/head
+		<COMMIT-G> refs/pull/2/head
+		<TAG-1> refs/tags/v1
+		<TAG-2> refs/tags/v2
+		<TAG-3> refs/tags/v3
+		<COMMIT-P> HEAD
+		The bundle uses this filter: $filter
+		The bundle records a complete history.
+		EOF
+		test_cmp expect actual &&
+
+		test_config uploadpack.allowfilter 1 &&
+		test_config uploadpack.allowanysha1inwant 1 &&
+		git clone --no-local --filter=$filter --bare "file://$(pwd)" cloned &&
+
+		git init unbundled &&
+		git -C unbundled bundle unbundle ../partial.bdl >ref-list.txt &&
+
+		# Count the same number of reachable objects.
+		reflist=$(git for-each-ref --format="%(objectname)") &&
+		git rev-list --objects --filter=$filter --missing=allow-any \
+			$reflist >expect &&
+		for repo in cloned unbundled
+		do
+			git -C $repo rev-list --objects --missing=allow-any \
+				$reflist >actual &&
+			test_cmp expect actual || return 1
+		done
+	'
+done
+
 test_done