diff mbox series

[v3,2/3] sparse-index: add ensure_correct_sparsity function

Message ID 9d6511db0728e9880a96f3d9e3a025a9ddc5bc9e.1635358812.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit b93fea08d24b0ceb498445cc80c91e26a6bff29b
Headers show
Series sparse-index: expand/collapse based on 'index.sparse' | expand

Commit Message

Victoria Dye Oct. 27, 2021, 6:20 p.m. UTC
From: Victoria Dye <vdye@github.com>

The `ensure_correct_sparsity` function is intended to provide a means of
aligning the in-core index with the sparsity required by the repository
settings and other properties of the index. The function first checks
whether a sparse index is allowed (per repository & sparse checkout pattern
settings). If the sparse index may be used, the index is converted to
sparse; otherwise, it is explicitly expanded with `ensure_full_index`.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Victoria Dye <vdye@github.com>
---
 sparse-index.c | 33 +++++++++++++++++++++++++++++----
 sparse-index.h |  1 +
 2 files changed, 30 insertions(+), 4 deletions(-)

Comments

Derrick Stolee Oct. 27, 2021, 8:07 p.m. UTC | #1
On 10/27/2021 2:20 PM, Victoria Dye via GitGitGadget wrote:
> From: Victoria Dye <vdye@github.com>

> +static int is_sparse_index_allowed(struct index_state *istate, int flags)

I agree this name is better.

>  {
> -	int test_env;
> -	if (istate->sparse_index || !istate->cache_nr ||
> -	    !core_apply_sparse_checkout || !core_sparse_checkout_cone)
> +	if (!core_apply_sparse_checkout || !core_sparse_checkout_cone)
>  		return 0;
>  
>  	if (!istate->repo)
>  		istate->repo = the_repository;
>  
>  	if (!(flags & SPARSE_INDEX_MEMORY_ONLY)) {
> +		int test_env;
> +
>  		/*
>  		 * The sparse index is not (yet) integrated with a split index.
>  		 */

Nice that most of the implementation comes over without showing
up in the diff.

>  	if (!istate->sparse_checkout_patterns->use_cone_patterns)
>  		return 0;
>  
> +	return 1;
> +}
> +
> +int convert_to_sparse(struct index_state *istate, int flags)
> +{
> +	/*
> +	 * If the index is already sparse, empty, or otherwise
> +	 * cannot be converted to sparse, do not convert.
> +	 */
> +	if (istate->sparse_index || !istate->cache_nr ||
> +	    !is_sparse_index_allowed(istate, flags))
> +		return 0;

> +void ensure_correct_sparsity(struct index_state *istate)
> +{
> +	/*
> +	 * If the index can be sparse, make it sparse. Otherwise,
> +	 * ensure the index is full.
> +	 */
> +	if (is_sparse_index_allowed(istate, 0))
> +		convert_to_sparse(istate, 0);
> +	else
> +		ensure_full_index(istate);
> +}

These two methods become very simple. Excellent.

-Stolee
Junio C Hamano Oct. 27, 2021, 9:32 p.m. UTC | #2
Derrick Stolee <stolee@gmail.com> writes:

>> +int convert_to_sparse(struct index_state *istate, int flags)
>> +{
>> +	/*
>> +	 * If the index is already sparse, empty, or otherwise
>> +	 * cannot be converted to sparse, do not convert.
>> +	 */
>> +	if (istate->sparse_index || !istate->cache_nr ||
>> +	    !is_sparse_index_allowed(istate, flags))
>> +		return 0;

Shouldn't we also at least do this?  Blindly blowing away the entire
cache-tree and rebuilding it from scratch may be hiding a latent bug
somewhere else, but is never supposed to be needed, and is a huge
waste of computational resources.

I say "at least" here, because a cache tree that is partially valid
should be safely salvageable---at least that was the intention back
when I designed the subsystem.

 sparse-index.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git c/sparse-index.c w/sparse-index.c
index bc3ee358c6..a95c3386f3 100644
--- c/sparse-index.c
+++ w/sparse-index.c
@@ -188,17 +188,19 @@ int convert_to_sparse(struct index_state *istate, int flags)
 	if (index_has_unmerged_entries(istate))
 		return 0;
 
-	/* Clear and recompute the cache-tree */
-	cache_tree_free(&istate->cache_tree);
-	/*
-	 * Silently return if there is a problem with the cache tree update,
-	 * which might just be due to a conflict state in some entry.
-	 *
-	 * This might create new tree objects, so be sure to use
-	 * WRITE_TREE_MISSING_OK.
-	 */
-	if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
-		return 0;
+	if (!cache_tree_fully_valid(&istate->cache_tree)) {
+		/* Clear and recompute the cache-tree */
+		cache_tree_free(&istate->cache_tree);
+		/*
+		 * Silently return if there is a problem with the cache tree update,
+		 * which might just be due to a conflict state in some entry.
+		 *
+		 * This might create new tree objects, so be sure to use
+		 * WRITE_TREE_MISSING_OK.
+		 */
+		if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
+			return 0;
+	}
 
 	remove_fsmonitor(istate);
Derrick Stolee Oct. 28, 2021, 1:24 a.m. UTC | #3
On 10/27/2021 5:32 PM, Junio C Hamano wrote:
> Derrick Stolee <stolee@gmail.com> writes:
> 
>>> +int convert_to_sparse(struct index_state *istate, int flags)
>>> +{
>>> +	/*
>>> +	 * If the index is already sparse, empty, or otherwise
>>> +	 * cannot be converted to sparse, do not convert.
>>> +	 */
>>> +	if (istate->sparse_index || !istate->cache_nr ||
>>> +	    !is_sparse_index_allowed(istate, flags))
>>> +		return 0;
> 
> Shouldn't we also at least do this?  Blindly blowing away the entire
> cache-tree and rebuilding it from scratch may be hiding a latent bug
> somewhere else, but is never supposed to be needed, and is a huge
> waste of computational resources.
> 
> I say "at least" here, because a cache tree that is partially valid
> should be safely salvageable---at least that was the intention back
> when I designed the subsystem.

I think you are right, what you propose below. It certainly seems
like it would work, and even speed up the conversion from full to
sparse. I think I erred on the side of extreme caution and used
a hope that converting to sparse would be rare.

>  sparse-index.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git c/sparse-index.c w/sparse-index.c
> index bc3ee358c6..a95c3386f3 100644
> --- c/sparse-index.c
> +++ w/sparse-index.c
> @@ -188,17 +188,19 @@ int convert_to_sparse(struct index_state *istate, int flags)
>  	if (index_has_unmerged_entries(istate))
>  		return 0;
>  
> -	/* Clear and recompute the cache-tree */
> -	cache_tree_free(&istate->cache_tree);
> -	/*
> -	 * Silently return if there is a problem with the cache tree update,
> -	 * which might just be due to a conflict state in some entry.
> -	 *
> -	 * This might create new tree objects, so be sure to use
> -	 * WRITE_TREE_MISSING_OK.
> -	 */
> -	if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
> -		return 0;
> +	if (!cache_tree_fully_valid(&istate->cache_tree)) {
> +		/* Clear and recompute the cache-tree */
> +		cache_tree_free(&istate->cache_tree);
> +		/*
> +		 * Silently return if there is a problem with the cache tree update,
> +		 * which might just be due to a conflict state in some entry.
> +		 *
> +		 * This might create new tree objects, so be sure to use
> +		 * WRITE_TREE_MISSING_OK.
> +		 */
> +		if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
> +			return 0;
> +	}

I think at this point we have enough tests that check the sparse index
and its different conversion points that the test suite might catch if
this is a bad idea. Note that this is only a change of behavior if the
cache-tree is valid, which I expect to be the case most of the time in
the tests.

Thanks,
-Stolee
Victoria Dye Oct. 29, 2021, 1:43 p.m. UTC | #4
Derrick Stolee wrote:
> On 10/27/2021 5:32 PM, Junio C Hamano wrote:
>> Derrick Stolee <stolee@gmail.com> writes:
>>
>>>> +int convert_to_sparse(struct index_state *istate, int flags)
>>>> +{
>>>> +	/*
>>>> +	 * If the index is already sparse, empty, or otherwise
>>>> +	 * cannot be converted to sparse, do not convert.
>>>> +	 */
>>>> +	if (istate->sparse_index || !istate->cache_nr ||
>>>> +	    !is_sparse_index_allowed(istate, flags))
>>>> +		return 0;
>>
>> Shouldn't we also at least do this?  Blindly blowing away the entire
>> cache-tree and rebuilding it from scratch may be hiding a latent bug
>> somewhere else, but is never supposed to be needed, and is a huge
>> waste of computational resources.
>>
>> I say "at least" here, because a cache tree that is partially valid
>> should be safely salvageable---at least that was the intention back
>> when I designed the subsystem.
> 
> I think you are right, what you propose below. It certainly seems
> like it would work, and even speed up the conversion from full to
> sparse. I think I erred on the side of extreme caution and used
> a hope that converting to sparse would be rare.
> 
>>  sparse-index.c | 24 +++++++++++++-----------
>>  1 file changed, 13 insertions(+), 11 deletions(-)
>>
>> diff --git c/sparse-index.c w/sparse-index.c
>> index bc3ee358c6..a95c3386f3 100644
>> --- c/sparse-index.c
>> +++ w/sparse-index.c
>> @@ -188,17 +188,19 @@ int convert_to_sparse(struct index_state *istate, int flags)
>>  	if (index_has_unmerged_entries(istate))
>>  		return 0;
>>  
>> -	/* Clear and recompute the cache-tree */
>> -	cache_tree_free(&istate->cache_tree);
>> -	/*
>> -	 * Silently return if there is a problem with the cache tree update,
>> -	 * which might just be due to a conflict state in some entry.
>> -	 *
>> -	 * This might create new tree objects, so be sure to use
>> -	 * WRITE_TREE_MISSING_OK.
>> -	 */
>> -	if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
>> -		return 0;
>> +	if (!cache_tree_fully_valid(&istate->cache_tree)) {
>> +		/* Clear and recompute the cache-tree */
>> +		cache_tree_free(&istate->cache_tree);
>> +		/*
>> +		 * Silently return if there is a problem with the cache tree update,
>> +		 * which might just be due to a conflict state in some entry.
>> +		 *
>> +		 * This might create new tree objects, so be sure to use
>> +		 * WRITE_TREE_MISSING_OK.
>> +		 */
>> +		if (cache_tree_update(istate, WRITE_TREE_MISSING_OK))
>> +			return 0;
>> +	}
> 
> I think at this point we have enough tests that check the sparse index
> and its different conversion points that the test suite might catch if
> this is a bad idea. Note that this is only a change of behavior if the
> cache-tree is valid, which I expect to be the case most of the time in
> the tests.
> 
> Thanks,
> -Stolee
> 

This change doesn't appear to introduce any test failures or other unwanted
behavior, so I don't see a reason not to include it. I'll add it in a
re-roll - thanks!
diff mbox series

Patch

diff --git a/sparse-index.c b/sparse-index.c
index 7b7ff79e044..bc3ee358c63 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -122,17 +122,17 @@  static int index_has_unmerged_entries(struct index_state *istate)
 	return 0;
 }
 
-int convert_to_sparse(struct index_state *istate, int flags)
+static int is_sparse_index_allowed(struct index_state *istate, int flags)
 {
-	int test_env;
-	if (istate->sparse_index || !istate->cache_nr ||
-	    !core_apply_sparse_checkout || !core_sparse_checkout_cone)
+	if (!core_apply_sparse_checkout || !core_sparse_checkout_cone)
 		return 0;
 
 	if (!istate->repo)
 		istate->repo = the_repository;
 
 	if (!(flags & SPARSE_INDEX_MEMORY_ONLY)) {
+		int test_env;
+
 		/*
 		 * The sparse index is not (yet) integrated with a split index.
 		 */
@@ -168,6 +168,19 @@  int convert_to_sparse(struct index_state *istate, int flags)
 	if (!istate->sparse_checkout_patterns->use_cone_patterns)
 		return 0;
 
+	return 1;
+}
+
+int convert_to_sparse(struct index_state *istate, int flags)
+{
+	/*
+	 * If the index is already sparse, empty, or otherwise
+	 * cannot be converted to sparse, do not convert.
+	 */
+	if (istate->sparse_index || !istate->cache_nr ||
+	    !is_sparse_index_allowed(istate, flags))
+		return 0;
+
 	/*
 	 * NEEDSWORK: If we have unmerged entries, then stay full.
 	 * Unmerged entries prevent the cache-tree extension from working.
@@ -313,6 +326,18 @@  void ensure_full_index(struct index_state *istate)
 	trace2_region_leave("index", "ensure_full_index", istate->repo);
 }
 
+void ensure_correct_sparsity(struct index_state *istate)
+{
+	/*
+	 * If the index can be sparse, make it sparse. Otherwise,
+	 * ensure the index is full.
+	 */
+	if (is_sparse_index_allowed(istate, 0))
+		convert_to_sparse(istate, 0);
+	else
+		ensure_full_index(istate);
+}
+
 /*
  * This static global helps avoid infinite recursion between
  * expand_to_path() and index_file_exists().
diff --git a/sparse-index.h b/sparse-index.h
index 9f3d7bc7faf..656bd835b25 100644
--- a/sparse-index.h
+++ b/sparse-index.h
@@ -4,6 +4,7 @@ 
 struct index_state;
 #define SPARSE_INDEX_MEMORY_ONLY (1 << 0)
 int convert_to_sparse(struct index_state *istate, int flags);
+void ensure_correct_sparsity(struct index_state *istate);
 
 /*
  * Some places in the codebase expect to search for a specific path.