diff mbox series

[v3,05/20] sparse-index: implement ensure_full_index()

Message ID 99292cdbaae488101d1c247ab94dc4b3b04d0311.1615912983.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse Index: Design, Format, Tests | expand

Commit Message

Derrick Stolee March 16, 2021, 4:42 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

We will mark an in-memory index_state as having sparse directory entries
with the sparse_index bit. These currently cannot exist, but we will add
a mechanism for collapsing a full index to a sparse one in a later
change. That will happen at write time, so we must first allow parsing
the format before writing it.

Commands or methods that require a full index in order to operate can
call ensure_full_index() to expand that index in-memory. This requires
parsing trees using that index's repository.

Sparse directory entries have a specific 'ce_mode' value. The macro
S_ISSPARSEDIR(ce->ce_mode) can check if a cache_entry 'ce' has this type.
This ce_mode is not possible with the existing index formats, so we don't
also verify all properties of a sparse-directory entry, which are:

 1. ce->ce_mode == 0040000
 2. ce->flags & CE_SKIP_WORKTREE is true
 3. ce->name[ce->namelen - 1] == '/' (ends in dir separator)
 4. ce->oid references a tree object.

These are all semi-enforced in ensure_full_index() to some extent. Any
deviation will cause a warning at minimum or a failure in the worst
case.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 cache.h        | 13 ++++++-
 read-cache.c   |  9 +++++
 sparse-index.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 118 insertions(+), 2 deletions(-)

Comments

Ævar Arnfjörð Bjarmason March 17, 2021, 1:03 p.m. UTC | #1
On Tue, Mar 16 2021, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <dstolee@microsoft.com>
> [...]
> +static int add_path_to_index(const struct object_id *oid,
> +			     struct strbuf *base, const char *path,
> +			     unsigned int mode, void *context)
> +{
> +	struct index_state *istate = (struct index_state *)context;
> +	struct cache_entry *ce;
> +	size_t len = base->len;
> +
> +	if (S_ISDIR(mode))
> +		return READ_TREE_RECURSIVE;
> +
> +	strbuf_addstr(base, path);
> +
> +	ce = make_cache_entry(istate, mode, oid, base->buf, 0, 0);
> +	ce->ce_flags |= CE_SKIP_WORKTREE;
> +	set_index_entry(istate, istate->cache_nr++, ce);
> +
> +	strbuf_setlen(base, len);
> +	return 0;
> +}
>  
>  void ensure_full_index(struct index_state *istate)
>  {
> -	/* intentionally left blank */
> +	int i;
> +	struct index_state *full;
> +	struct strbuf base = STRBUF_INIT;
> +
> +	if (!istate || !istate->sparse_index)
> +		return;
> +
> +	if (!istate->repo)
> +		istate->repo = the_repository;
> +
> +	trace2_region_enter("index", "ensure_full_index", istate->repo);
> +
> +	/* initialize basics of new index */
> +	full = xcalloc(1, sizeof(struct index_state));
> +	memcpy(full, istate, sizeof(struct index_state));
> +
> +	/* then change the necessary things */
> +	full->sparse_index = 0;
> +	full->cache_alloc = (3 * istate->cache_alloc) / 2;
> +	full->cache_nr = 0;
> +	ALLOC_ARRAY(full->cache, full->cache_alloc);
> +
> +	for (i = 0; i < istate->cache_nr; i++) {
> +		struct cache_entry *ce = istate->cache[i];
> +		struct tree *tree;
> +		struct pathspec ps;
> +
> +		if (!S_ISSPARSEDIR(ce->ce_mode)) {
> +			set_index_entry(full, full->cache_nr++, ce);
> +			continue;
> +		}
> +		if (!(ce->ce_flags & CE_SKIP_WORKTREE))
> +			warning(_("index entry is a directory, but not sparse (%08x)"),
> +				ce->ce_flags);
> +
> +		/* recursively walk into cd->name */
> +		tree = lookup_tree(istate->repo, &ce->oid);
> +
> +		memset(&ps, 0, sizeof(ps));
> +		ps.recursive = 1;
> +		ps.has_wildcard = 1;
> +		ps.max_depth = -1;
> +
> +		strbuf_setlen(&base, 0);
> +		strbuf_add(&base, ce->name, strlen(ce->name));
> +
> +		read_tree_at(istate->repo, tree, &base, &ps,
> +			     add_path_to_index, full);
> +
> +		/* free directory entries. full entries are re-used */
> +		discard_cache_entry(ce);
> +	}
> +
> +	/* Copy back into original index. */
> +	memcpy(&istate->name_hash, &full->name_hash, sizeof(full->name_hash));
> +	istate->sparse_index = 0;
> +	free(istate->cache);
> +	istate->cache = full->cache;
> +	istate->cache_nr = full->cache_nr;
> +	istate->cache_alloc = full->cache_alloc;
> +
> +	strbuf_release(&base);
> +	free(full);
> +
> +	trace2_region_leave("index", "ensure_full_index", istate->repo);
>  }

Not that I mind having added the read_tree_at() again, but just thinking
aloud here.

So we need this loop here because there's nothing like a read_tree_at()
that knows how to start at the non-tree root of the index, and then for
each directory there we're going to perform the equivalent of a
read_tree() there, but we need to set the base for add_path_to_index()
since we started at subdirs, not the root.

That's fine, but grepping around a bit I wonder if we shouldn't
eventually have some slightly fancier API that just works like
read_tree() but takes an optional "start at the index's root" instead.

Well, things that want that usually care about the index-specific bits,
whereas this "I just care about the tree for these" is more of a special
case I guess.
diff mbox series

Patch

diff --git a/cache.h b/cache.h
index c2f8a8eadf67..abb00a068e5d 100644
--- a/cache.h
+++ b/cache.h
@@ -204,6 +204,8 @@  struct cache_entry {
 #error "CE_EXTENDED_FLAGS out of range"
 #endif
 
+#define S_ISSPARSEDIR(m) ((m) == S_IFDIR)
+
 /* Forward structure decls */
 struct pathspec;
 struct child_process;
@@ -319,7 +321,14 @@  struct index_state {
 		 drop_cache_tree : 1,
 		 updated_workdir : 1,
 		 updated_skipworktree : 1,
-		 fsmonitor_has_run_once : 1;
+		 fsmonitor_has_run_once : 1,
+
+		 /*
+		  * sparse_index == 1 when sparse-directory
+		  * entries exist. Requires sparse-checkout
+		  * in cone mode.
+		  */
+		 sparse_index : 1;
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
 	struct object_id oid;
@@ -722,6 +731,8 @@  int read_index_from(struct index_state *, const char *path,
 		    const char *gitdir);
 int is_index_unborn(struct index_state *);
 
+void ensure_full_index(struct index_state *istate);
+
 /* For use with `write_locked_index()`. */
 #define COMMIT_LOCK		(1 << 0)
 #define SKIP_IF_UNCHANGED	(1 << 1)
diff --git a/read-cache.c b/read-cache.c
index 1e9a50c6c734..dd3980c12b53 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -101,6 +101,9 @@  static const char *alternate_index_output;
 
 static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
+	if (S_ISSPARSEDIR(ce->ce_mode))
+		istate->sparse_index = 1;
+
 	istate->cache[nr] = ce;
 	add_name_hash(istate, ce);
 }
@@ -2273,6 +2276,12 @@  int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	trace2_data_intmax("index", the_repository, "read/cache_nr",
 			   istate->cache_nr);
 
+	if (!istate->repo)
+		istate->repo = the_repository;
+	prepare_repo_settings(istate->repo);
+	if (istate->repo->settings.command_requires_full_index)
+		ensure_full_index(istate);
+
 	return istate->cache_nr;
 
 unmap:
diff --git a/sparse-index.c b/sparse-index.c
index 82183ead563b..7095378a1b28 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -1,8 +1,104 @@ 
 #include "cache.h"
 #include "repository.h"
 #include "sparse-index.h"
+#include "tree.h"
+#include "pathspec.h"
+#include "trace2.h"
+
+static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
+{
+	ALLOC_GROW(istate->cache, nr + 1, istate->cache_alloc);
+
+	istate->cache[nr] = ce;
+	add_name_hash(istate, ce);
+}
+
+static int add_path_to_index(const struct object_id *oid,
+			     struct strbuf *base, const char *path,
+			     unsigned int mode, void *context)
+{
+	struct index_state *istate = (struct index_state *)context;
+	struct cache_entry *ce;
+	size_t len = base->len;
+
+	if (S_ISDIR(mode))
+		return READ_TREE_RECURSIVE;
+
+	strbuf_addstr(base, path);
+
+	ce = make_cache_entry(istate, mode, oid, base->buf, 0, 0);
+	ce->ce_flags |= CE_SKIP_WORKTREE;
+	set_index_entry(istate, istate->cache_nr++, ce);
+
+	strbuf_setlen(base, len);
+	return 0;
+}
 
 void ensure_full_index(struct index_state *istate)
 {
-	/* intentionally left blank */
+	int i;
+	struct index_state *full;
+	struct strbuf base = STRBUF_INIT;
+
+	if (!istate || !istate->sparse_index)
+		return;
+
+	if (!istate->repo)
+		istate->repo = the_repository;
+
+	trace2_region_enter("index", "ensure_full_index", istate->repo);
+
+	/* initialize basics of new index */
+	full = xcalloc(1, sizeof(struct index_state));
+	memcpy(full, istate, sizeof(struct index_state));
+
+	/* then change the necessary things */
+	full->sparse_index = 0;
+	full->cache_alloc = (3 * istate->cache_alloc) / 2;
+	full->cache_nr = 0;
+	ALLOC_ARRAY(full->cache, full->cache_alloc);
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		struct tree *tree;
+		struct pathspec ps;
+
+		if (!S_ISSPARSEDIR(ce->ce_mode)) {
+			set_index_entry(full, full->cache_nr++, ce);
+			continue;
+		}
+		if (!(ce->ce_flags & CE_SKIP_WORKTREE))
+			warning(_("index entry is a directory, but not sparse (%08x)"),
+				ce->ce_flags);
+
+		/* recursively walk into cd->name */
+		tree = lookup_tree(istate->repo, &ce->oid);
+
+		memset(&ps, 0, sizeof(ps));
+		ps.recursive = 1;
+		ps.has_wildcard = 1;
+		ps.max_depth = -1;
+
+		strbuf_setlen(&base, 0);
+		strbuf_add(&base, ce->name, strlen(ce->name));
+
+		read_tree_at(istate->repo, tree, &base, &ps,
+			     add_path_to_index, full);
+
+		/* free directory entries. full entries are re-used */
+		discard_cache_entry(ce);
+	}
+
+	/* Copy back into original index. */
+	memcpy(&istate->name_hash, &full->name_hash, sizeof(full->name_hash));
+	istate->sparse_index = 0;
+	free(istate->cache);
+	istate->cache = full->cache;
+	istate->cache_nr = full->cache_nr;
+	istate->cache_alloc = full->cache_alloc;
+
+	strbuf_release(&base);
+	free(full);
+
+	trace2_region_leave("index", "ensure_full_index", istate->repo);
 }