diff mbox series

[6/8] sparse-index: complete partial expansion

Message ID eba63cc12af4f60320b34a54eef691b9f59d86bc.1652724693.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse index: integrate with sparse-checkout | expand

Commit Message

Derrick Stolee May 16, 2022, 6:11 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

To complete the implementation of expand_to_pattern_list(), we need to
detect when a sparse directory entry should remain sparse. This avoids a
full expansion, so we now need to use the PARTIALLY_SPARSE mode to
indicate this state.

There still are no callers to this method, but we will add one in the
next change.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 sparse-index.c | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

Comments

Victoria Dye May 16, 2022, 8:38 p.m. UTC | #1
Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
> 
> To complete the implementation of expand_to_pattern_list(), we need to
> detect when a sparse directory entry should remain sparse. This avoids a
> full expansion, so we now need to use the PARTIALLY_SPARSE mode to
> indicate this state.
> 
> There still are no callers to this method, but we will add one in the
> next change.
> 
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  sparse-index.c | 41 +++++++++++++++++++++++++++++++++++++----
>  1 file changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/sparse-index.c b/sparse-index.c
> index 3d8eed585b5..0bad5503304 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -297,8 +297,24 @@ void expand_to_pattern_list(struct index_state *istate,
>  	 * continue. A NULL pattern set indicates a full expansion to a
>  	 * full index.
>  	 */
> -	if (pl && !pl->use_cone_patterns)
> +	if (pl && !pl->use_cone_patterns) {
>  		pl = NULL;
> +	} else {
> +		/*
> +		 * We might contract file entries into sparse-directory
> +		 * entries, and for that we will need the cache tree to
> +		 * be recomputed.
> +		 */
> +		cache_tree_free(&istate->cache_tree);
> +
> +		/*
> +		 * If there is a problem creating the cache tree, then we
> +		 * need to expand to a full index since we cannot satisfy
> +		 * the current request as a sparse index.
> +		 */
> +		if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
> +			pl = NULL;
> +	}
>  
>  	if (!istate->repo)
>  		istate->repo = the_repository;
> @@ -317,8 +333,14 @@ void expand_to_pattern_list(struct index_state *istate,
>  	full = xcalloc(1, sizeof(struct index_state));
>  	memcpy(full, istate, sizeof(struct index_state));
>  
> +	/*
> +	 * This slightly-misnamed 'full' index might still be sparse if we
> +	 * are only modifying the list of sparse directories. This hinges
> +	 * on whether we have a non-NULL pattern list.
> +	 */
> +	full->sparse_index = pl ? PARTIALLY_SPARSE : COMPLETELY_FULL;
> +
>  	/* then change the necessary things */
> -	full->sparse_index = 0;
>  	full->cache_alloc = (3 * istate->cache_alloc) / 2;
>  	full->cache_nr = 0;
>  	ALLOC_ARRAY(full->cache, full->cache_alloc);
> @@ -330,11 +352,22 @@ void expand_to_pattern_list(struct index_state *istate,
>  		struct cache_entry *ce = istate->cache[i];
>  		struct tree *tree;
>  		struct pathspec ps;
> +		int dtype;
>  
>  		if (!S_ISSPARSEDIR(ce->ce_mode)) {
>  			set_index_entry(full, full->cache_nr++, ce);
>  			continue;
>  		}
> +
> +		/* We now have a sparse directory entry. Should we expand? */
> +		if (pl &&
> +		    path_matches_pattern_list(ce->name, ce->ce_namelen,
> +					      NULL, &dtype,
> +					      pl, istate) <= 0) {

If I'm reading this correctly, what this is doing is:

- if we have a sparse directory entry
- ...and we're expanding only what matches the pattern list (i.e., not
  'ensure_full_index')
- ...and that sparse directory path is either *not matching* or *undecided
  whether it matches* the pattern list
- ...then we add the sparse directory to the result index and continue. 

The part that's confusing me is the "<= 0", which means a return value of
'UNDECIDED' from 'path_matches_pattern_list' adds the sparse directory
as-is. At the moment, it looks like 'UNDECIDED' is only returned if not
using cone patterns (so it shouldn't make a functional difference at this
point), but wouldn't that return value indicate that the pattern *may or may
not* match the path, so we should continue on to 'read_tree_at()'?

All that to say, should the condition be:

		/* We now have a sparse directory entry. Should we expand? */
		if (pl &&
		    path_matches_pattern_list(ce->name, ce->ce_namelen,
					      NULL, &dtype,
					      pl, istate) == NOT_MATCHED) {

to reflect that a sparse directory should only be added to the index if we
*know* it isn't matched?

To be clear, this is ultimately a non-functional nit - my question is mostly
to make sure I understand the intent of the code.

> +			set_index_entry(full, full->cache_nr++, ce);
> +			continue;
> +		}
> +
>  		if (!(ce->ce_flags & CE_SKIP_WORKTREE))
>  			warning(_("index entry is a directory, but not sparse (%08x)"),
>  				ce->ce_flags);
> @@ -360,7 +393,7 @@ void expand_to_pattern_list(struct index_state *istate,
>  	/* Copy back into original index. */
>  	memcpy(&istate->name_hash, &full->name_hash, sizeof(full->name_hash));
>  	memcpy(&istate->dir_hash, &full->dir_hash, sizeof(full->dir_hash));
> -	istate->sparse_index = 0;
> +	istate->sparse_index = pl ? PARTIALLY_SPARSE : COMPLETELY_FULL;
>  	free(istate->cache);
>  	istate->cache = full->cache;
>  	istate->cache_nr = full->cache_nr;
> @@ -374,7 +407,7 @@ void expand_to_pattern_list(struct index_state *istate,
>  
>  	/* Clear and recompute the cache-tree */
>  	cache_tree_free(&istate->cache_tree);
> -	cache_tree_update(istate, 0);
> +	cache_tree_update(istate, WRITE_TREE_MISSING_OK);
>  
>  	trace2_region_leave("index",
>  			    pl ? "expand_to_pattern_list" : "ensure_full_index",
Derrick Stolee May 17, 2022, 1:23 p.m. UTC | #2
On 5/16/2022 4:38 PM, Victoria Dye wrote:
> Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <dstolee@microsoft.com>

>> +
>> +		/* We now have a sparse directory entry. Should we expand? */
>> +		if (pl &&
>> +		    path_matches_pattern_list(ce->name, ce->ce_namelen,
>> +					      NULL, &dtype,
>> +					      pl, istate) <= 0) {
> 
> If I'm reading this correctly, what this is doing is:
> 
> - if we have a sparse directory entry
> - ...and we're expanding only what matches the pattern list (i.e., not
>   'ensure_full_index')
> - ...and that sparse directory path is either *not matching* or *undecided
>   whether it matches* the pattern list
> - ...then we add the sparse directory to the result index and continue. 
> 
> The part that's confusing me is the "<= 0", which means a return value of
> 'UNDECIDED' from 'path_matches_pattern_list' adds the sparse directory
> as-is. At the moment, it looks like 'UNDECIDED' is only returned if not
> using cone patterns (so it shouldn't make a functional difference at this
> point), but wouldn't that return value indicate that the pattern *may or may
> not* match the path, so we should continue on to 'read_tree_at()'?
> 
> All that to say, should the condition be:
> 
> 		/* We now have a sparse directory entry. Should we expand? */
> 		if (pl &&
> 		    path_matches_pattern_list(ce->name, ce->ce_namelen,
> 					      NULL, &dtype,
> 					      pl, istate) == NOT_MATCHED) {
> 
> to reflect that a sparse directory should only be added to the index if we
> *know* it isn't matched?
> 
> To be clear, this is ultimately a non-functional nit - my question is mostly
> to make sure I understand the intent of the code.

You are 100% correct, and using "== NOT_MATCHED" makes the intention clear,
as well as being more robust to a breakage in path_matches_pattern_list().

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/sparse-index.c b/sparse-index.c
index 3d8eed585b5..0bad5503304 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -297,8 +297,24 @@  void expand_to_pattern_list(struct index_state *istate,
 	 * continue. A NULL pattern set indicates a full expansion to a
 	 * full index.
 	 */
-	if (pl && !pl->use_cone_patterns)
+	if (pl && !pl->use_cone_patterns) {
 		pl = NULL;
+	} else {
+		/*
+		 * We might contract file entries into sparse-directory
+		 * entries, and for that we will need the cache tree to
+		 * be recomputed.
+		 */
+		cache_tree_free(&istate->cache_tree);
+
+		/*
+		 * If there is a problem creating the cache tree, then we
+		 * need to expand to a full index since we cannot satisfy
+		 * the current request as a sparse index.
+		 */
+		if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
+			pl = NULL;
+	}
 
 	if (!istate->repo)
 		istate->repo = the_repository;
@@ -317,8 +333,14 @@  void expand_to_pattern_list(struct index_state *istate,
 	full = xcalloc(1, sizeof(struct index_state));
 	memcpy(full, istate, sizeof(struct index_state));
 
+	/*
+	 * This slightly-misnamed 'full' index might still be sparse if we
+	 * are only modifying the list of sparse directories. This hinges
+	 * on whether we have a non-NULL pattern list.
+	 */
+	full->sparse_index = pl ? PARTIALLY_SPARSE : COMPLETELY_FULL;
+
 	/* then change the necessary things */
-	full->sparse_index = 0;
 	full->cache_alloc = (3 * istate->cache_alloc) / 2;
 	full->cache_nr = 0;
 	ALLOC_ARRAY(full->cache, full->cache_alloc);
@@ -330,11 +352,22 @@  void expand_to_pattern_list(struct index_state *istate,
 		struct cache_entry *ce = istate->cache[i];
 		struct tree *tree;
 		struct pathspec ps;
+		int dtype;
 
 		if (!S_ISSPARSEDIR(ce->ce_mode)) {
 			set_index_entry(full, full->cache_nr++, ce);
 			continue;
 		}
+
+		/* We now have a sparse directory entry. Should we expand? */
+		if (pl &&
+		    path_matches_pattern_list(ce->name, ce->ce_namelen,
+					      NULL, &dtype,
+					      pl, istate) <= 0) {
+			set_index_entry(full, full->cache_nr++, ce);
+			continue;
+		}
+
 		if (!(ce->ce_flags & CE_SKIP_WORKTREE))
 			warning(_("index entry is a directory, but not sparse (%08x)"),
 				ce->ce_flags);
@@ -360,7 +393,7 @@  void expand_to_pattern_list(struct index_state *istate,
 	/* Copy back into original index. */
 	memcpy(&istate->name_hash, &full->name_hash, sizeof(full->name_hash));
 	memcpy(&istate->dir_hash, &full->dir_hash, sizeof(full->dir_hash));
-	istate->sparse_index = 0;
+	istate->sparse_index = pl ? PARTIALLY_SPARSE : COMPLETELY_FULL;
 	free(istate->cache);
 	istate->cache = full->cache;
 	istate->cache_nr = full->cache_nr;
@@ -374,7 +407,7 @@  void expand_to_pattern_list(struct index_state *istate,
 
 	/* Clear and recompute the cache-tree */
 	cache_tree_free(&istate->cache_tree);
-	cache_tree_update(istate, 0);
+	cache_tree_update(istate, WRITE_TREE_MISSING_OK);
 
 	trace2_region_leave("index",
 			    pl ? "expand_to_pattern_list" : "ensure_full_index",