diff mbox series

[v4,2/8] revision: learn '--no-kept-objects'

Message ID c85a9155970aa3f59990d3678da21bb88a9cac08.1614047097.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series repack: support repacking into a geometric sequence | expand

Commit Message

Taylor Blau Feb. 23, 2021, 2:25 a.m. UTC
A future caller will want to be able to perform a reachability traversal
which terminates when visiting an object found in a kept pack. The
closest existing option is '--honor-pack-keep', but this isn't quite
what we want. Instead of halting the traversal midway through, a full
traversal is always performed, and the results are only trimmed
afterwords.

Besides needing to introduce a new flag (since culling results
post-facto can be different than halting the traversal as it's
happening), there is an additional wrinkle handling the distinction
in-core and on-disk kept packs. That is: what kinds of kept pack should
stop the traversal?

Introduce '--no-kept-objects[=<on-disk|in-core>]' to specify which kinds
of kept packs, if any, should stop a traversal. This can be useful for
callers that want to perform a reachability analysis, but want to leave
certain packs alone (for e.g., when doing a geometric repack that has
some "large" packs which are kept in-core that it wants to leave alone).

Note that this option is not guaranteed to produce exactly the set of
objects that aren't in kept packs, since it's possible the traversal
order may end up in a situation where a non-kept ancestor was "cut off"
by a kept object (at which point we would stop traversing). But, we
don't care about absolute correctness here, since this will eventually
be used as a purely additive guide in an upcoming new repack mode.

Explicitly avoid documenting this new flag, since it is only used
internally. In theory we could avoid even adding it rev-list, but being
able to spell this option out on the command-line makes some special
cases easier to test without promising to keep it behaving consistently
forever. Those tricky cases are exercised in t6114.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 revision.c            | 15 ++++++++++
 revision.h            |  4 +++
 t/t6114-keep-packs.sh | 69 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)
 create mode 100755 t/t6114-keep-packs.sh
diff mbox series

Patch

diff --git a/revision.c b/revision.c
index b78733f508..dca2b8c801 100644
--- a/revision.c
+++ b/revision.c
@@ -2336,6 +2336,16 @@  static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
 		revs->unpacked = 1;
 	} else if (starts_with(arg, "--unpacked=")) {
 		die(_("--unpacked=<packfile> no longer supported"));
+	} else if (!strcmp(arg, "--no-kept-objects")) {
+		revs->no_kept_objects = 1;
+		revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+		revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
+	} else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) {
+		revs->no_kept_objects = 1;
+		if (!strcmp(optarg, "in-core"))
+			revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+		if (!strcmp(optarg, "on-disk"))
+			revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
 	} else if (!strcmp(arg, "-r")) {
 		revs->diff = 1;
 		revs->diffopt.flags.recursive = 1;
@@ -3795,6 +3805,11 @@  enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
 		return commit_ignore;
 	if (revs->unpacked && has_object_pack(&commit->object.oid))
 		return commit_ignore;
+	if (revs->no_kept_objects) {
+		if (has_object_kept_pack(&commit->object.oid,
+					 revs->keep_pack_cache_flags))
+			return commit_ignore;
+	}
 	if (commit->object.flags & UNINTERESTING)
 		return commit_ignore;
 	if (revs->line_level_traverse && !want_ancestry(revs)) {
diff --git a/revision.h b/revision.h
index e6be3c845e..a20a530d52 100644
--- a/revision.h
+++ b/revision.h
@@ -148,6 +148,7 @@  struct rev_info {
 			edge_hint_aggressive:1,
 			limited:1,
 			unpacked:1,
+			no_kept_objects:1,
 			boundary:2,
 			count:1,
 			left_right:1,
@@ -317,6 +318,9 @@  struct rev_info {
 	 * This is loaded from the commit-graph being used.
 	 */
 	struct bloom_filter_settings *bloom_filter_settings;
+
+	/* misc. flags related to '--no-kept-objects' */
+	unsigned keep_pack_cache_flags;
 };
 
 int ref_excluded(struct string_list *, const char *path);
diff --git a/t/t6114-keep-packs.sh b/t/t6114-keep-packs.sh
new file mode 100755
index 0000000000..9239d8aa46
--- /dev/null
+++ b/t/t6114-keep-packs.sh
@@ -0,0 +1,69 @@ 
+#!/bin/sh
+
+test_description='rev-list with .keep packs'
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	test_commit loose &&
+	test_commit packed &&
+	test_commit kept &&
+
+	KEPT_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
+	refs/tags/kept
+	^refs/tags/packed
+	EOF
+	) &&
+	MISC_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
+	refs/tags/packed
+	^refs/tags/loose
+	EOF
+	) &&
+
+	touch .git/objects/pack/pack-$KEPT_PACK.keep
+'
+
+rev_list_objects () {
+	git rev-list "$@" >out &&
+	sort out
+}
+
+idx_objects () {
+	git show-index <$1 >expect-idx &&
+	cut -d" " -f2 <expect-idx | sort
+}
+
+test_expect_success '--no-kept-objects excludes trees and blobs in .keep packs' '
+	rev_list_objects --objects --all --no-object-names >kept &&
+	rev_list_objects --objects --all --no-object-names --no-kept-objects >no-kept &&
+
+	idx_objects .git/objects/pack/pack-$KEPT_PACK.idx >expect &&
+	comm -3 kept no-kept >actual &&
+
+	test_cmp expect actual
+'
+
+test_expect_success '--no-kept-objects excludes kept non-MIDX object' '
+	test_config core.multiPackIndex true &&
+
+	# Create a pack with just the commit object in pack, and do not mark it
+	# as kept (even though it appears in $KEPT_PACK, which does have a .keep
+	# file).
+	MIDX_PACK=$(git pack-objects .git/objects/pack/pack <<-EOF
+	$(git rev-parse kept)
+	EOF
+	) &&
+
+	# Write a MIDX containing all packs, but use the version of the commit
+	# at "kept" in a non-kept pack by touching $MIDX_PACK.
+	touch .git/objects/pack/pack-$MIDX_PACK.pack &&
+	git multi-pack-index write &&
+
+	rev_list_objects --objects --no-object-names --no-kept-objects HEAD >actual &&
+	(
+		idx_objects .git/objects/pack/pack-$MISC_PACK.idx &&
+		git rev-list --objects --no-object-names refs/tags/loose
+	) | sort >expect &&
+	test_cmp expect actual
+'
+
+test_done