diff mbox series

[v3,4/4] unpack-trees: unpack new trees as sparse directories

Message ID 010d5e51595bd3f77b6d3606298a2b436c66ecc6.1659985672.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit b15207b8cf1a1930fe5eb076c08c6ddc92d9282d
Headers show
Series reset/checkout: fix miscellaneous sparse index bugs | expand

Commit Message

Victoria Dye Aug. 8, 2022, 7:07 p.m. UTC
From: Victoria Dye <vdye@github.com>

If 'unpack_single_entry()' is unpacking a new directory tree (that is, one
not already present in the index) into a sparse index, unpack the tree as a
sparse directory rather than traversing its contents and unpacking each file
individually. This helps keep the sparse index as collapsed as possible in
cases such as 'git reset --hard' restoring a outside-of-cone directory
removed with 'git rm -r --sparse'.

Without this patch, 'unpack_single_entry()' will only unpack a directory
into the index as a sparse directory (rather than traversing into it and
unpacking its files one-by-one) if an entry with the same name already
exists in the index. This patch allows sparse directory unpacking without a
matching index entry when the following conditions are met:

1. the directory's path is outside the sparse cone, and
2. there are no children of the directory in the index

If a directory meets these requirements (as determined by
'is_new_sparse_dir()'), 'unpack_single_entry()' unpacks the sparse directory
index entry and propagates the decision back up to 'unpack_callback()' to
prevent unnecessary tree traversal into the unpacked directory.

Reported-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
Signed-off-by: Victoria Dye <vdye@github.com>
---
 t/t1092-sparse-checkout-compatibility.sh |  17 ++++
 unpack-trees.c                           | 106 ++++++++++++++++++++---
 2 files changed, 113 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index 99a1425a929..3588dd7b102 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -695,6 +695,23 @@  test_expect_success 'reset with wildcard pathspec' '
 	test_all_match git ls-files -s -- folder1
 '
 
+test_expect_success 'reset hard with removed sparse dir' '
+	init_repos &&
+
+	run_on_all git rm -r --sparse folder1 &&
+	test_all_match git status --porcelain=v2 &&
+
+	test_all_match git reset --hard &&
+	test_all_match git status --porcelain=v2 &&
+
+	cat >expect <<-\EOF &&
+	folder1/
+	EOF
+
+	git -C sparse-index ls-files --sparse folder1 >out &&
+	test_cmp expect out
+'
+
 test_expect_success 'update-index modify outside sparse definition' '
 	init_repos &&
 
diff --git a/unpack-trees.c b/unpack-trees.c
index 8a454e03bff..90b92114be8 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1069,6 +1069,67 @@  static struct cache_entry *create_ce_entry(const struct traverse_info *info,
 	return ce;
 }
 
+/*
+ * Determine whether the path specified by 'p' should be unpacked as a new
+ * sparse directory in a sparse index. A new sparse directory 'A/':
+ * - must be outside the sparse cone.
+ * - must not already be in the index (i.e., no index entry with name 'A/'
+ *   exists).
+ * - must not have any child entries in the index (i.e., no index entry
+ *   'A/<something>' exists).
+ * If 'p' meets the above requirements, return 1; otherwise, return 0.
+ */
+static int entry_is_new_sparse_dir(const struct traverse_info *info,
+				   const struct name_entry *p)
+{
+	int res, pos;
+	struct strbuf dirpath = STRBUF_INIT;
+	struct unpack_trees_options *o = info->data;
+
+	if (!S_ISDIR(p->mode))
+		return 0;
+
+	/*
+	 * If the path is inside the sparse cone, it can't be a sparse directory.
+	 */
+	strbuf_add(&dirpath, info->traverse_path, info->pathlen);
+	strbuf_add(&dirpath, p->path, p->pathlen);
+	strbuf_addch(&dirpath, '/');
+	if (path_in_cone_mode_sparse_checkout(dirpath.buf, o->src_index)) {
+		res = 0;
+		goto cleanup;
+	}
+
+	pos = index_name_pos_sparse(o->src_index, dirpath.buf, dirpath.len);
+	if (pos >= 0) {
+		/* Path is already in the index, not a new sparse dir */
+		res = 0;
+		goto cleanup;
+	}
+
+	/* Where would this sparse dir be inserted into the index? */
+	pos = -pos - 1;
+	if (pos >= o->src_index->cache_nr) {
+		/*
+		 * Sparse dir would be inserted at the end of the index, so we
+		 * know it has no child entries.
+		 */
+		res = 1;
+		goto cleanup;
+	}
+
+	/*
+	 * If the dir has child entries in the index, the first would be at the
+	 * position the sparse directory would be inserted. If the entry at this
+	 * position is inside the dir, not a new sparse dir.
+	 */
+	res = strncmp(o->src_index->cache[pos]->name, dirpath.buf, dirpath.len);
+
+cleanup:
+	strbuf_release(&dirpath);
+	return res;
+}
+
 /*
  * Note that traverse_by_cache_tree() duplicates some logic in this function
  * without actually calling it. If you change the logic here you may need to
@@ -1078,21 +1139,44 @@  static int unpack_single_entry(int n, unsigned long mask,
 			       unsigned long dirmask,
 			       struct cache_entry **src,
 			       const struct name_entry *names,
-			       const struct traverse_info *info)
+			       const struct traverse_info *info,
+			       int *is_new_sparse_dir)
 {
 	int i;
 	struct unpack_trees_options *o = info->data;
 	unsigned long conflicts = info->df_conflicts | dirmask;
+	const struct name_entry *p = names;
 
-	if (mask == dirmask && !src[0])
-		return 0;
+	*is_new_sparse_dir = 0;
+	if (mask == dirmask && !src[0]) {
+		/*
+		 * If we're not in a sparse index, we can't unpack a directory
+		 * without recursing into it, so we return.
+		 */
+		if (!o->src_index->sparse_index)
+			return 0;
+
+		/* Find first entry with a real name (we could use "mask" too) */
+		while (!p->mode)
+			p++;
+
+		/*
+		 * If the directory is completely missing from the index but
+		 * would otherwise be a sparse directory, we should unpack it.
+		 * If not, we'll return and continue recursively traversing the
+		 * tree.
+		 */
+		*is_new_sparse_dir = entry_is_new_sparse_dir(info, p);
+		if (!*is_new_sparse_dir)
+			return 0;
+	}
 
 	/*
-	 * When we have a sparse directory entry for src[0],
-	 * then this isn't necessarily a directory-file conflict.
+	 * When we are unpacking a sparse directory, then this isn't necessarily
+	 * a directory-file conflict.
 	 */
-	if (mask == dirmask && src[0] &&
-	    S_ISSPARSEDIR(src[0]->ce_mode))
+	if (mask == dirmask &&
+	    (*is_new_sparse_dir || (src[0] && S_ISSPARSEDIR(src[0]->ce_mode))))
 		conflicts = 0;
 
 	/*
@@ -1352,7 +1436,7 @@  static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma
 {
 	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
 	struct unpack_trees_options *o = info->data;
-	int ret;
+	int ret, is_new_sparse_dir;
 
 	assert(o->merge);
 
@@ -1376,7 +1460,7 @@  static int unpack_sparse_callback(int n, unsigned long mask, unsigned long dirma
 	 * "index" tree (i.e., names[0]) and adjust 'names', 'n', 'mask', and
 	 * 'dirmask' accordingly.
 	 */
-	ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info);
+	ret = unpack_single_entry(n - 1, mask >> 1, dirmask >> 1, src, names + 1, info, &is_new_sparse_dir);
 
 	if (src[0])
 		discard_cache_entry(src[0]);
@@ -1394,6 +1478,7 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
 	struct unpack_trees_options *o = info->data;
 	const struct name_entry *p = names;
+	int is_new_sparse_dir;
 
 	/* Find first entry with a real name (we could use "mask" too) */
 	while (!p->mode)
@@ -1440,7 +1525,7 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 		}
 	}
 
-	if (unpack_single_entry(n, mask, dirmask, src, names, info) < 0)
+	if (unpack_single_entry(n, mask, dirmask, src, names, info, &is_new_sparse_dir))
 		return -1;
 
 	if (o->merge && src[0]) {
@@ -1478,6 +1563,7 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 		}
 
 		if (!is_sparse_directory_entry(src[0], names, info) &&
+		    !is_new_sparse_dir &&
 		    traverse_trees_recursive(n, dirmask, mask & ~dirmask,
 						    names, info) < 0) {
 			return -1;