diff mbox series

[v3,2/9] cache-tree: simplify verify_cache() prototype

Message ID 1b8b56800948339c0e0387555698bdfdc80a19ad.1611431900.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series More index cleanups | expand

Commit Message

Derrick Stolee Jan. 23, 2021, 7:58 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The verify_cache() method takes an array of cache entries and a count,
but these are always provided directly from a struct index_state. Use
a pointer to the full structure instead.

There is a subtle point when istate->cache_nr is zero that subtracting
one will underflow. This triggers a failure in t0000-basic.sh, among
others. Use "i + 1 < istate->cache_nr" to avoid these strange
comparisons. Convert i to be unsigned as well, which also removes the
potential signed overflow in the unlikely case that cache_nr is over 2.1
billion entries. The 'funny' variable has a maximum value of 11, so
making it unsigned does not change anything of importance.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 cache-tree.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

Comments

Elijah Newren Jan. 23, 2021, 8:24 p.m. UTC | #1
On Sat, Jan 23, 2021 at 11:58 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The verify_cache() method takes an array of cache entries and a count,
> but these are always provided directly from a struct index_state. Use
> a pointer to the full structure instead.
>
> There is a subtle point when istate->cache_nr is zero that subtracting
> one will underflow. This triggers a failure in t0000-basic.sh, among
> others. Use "i + 1 < istate->cache_nr" to avoid these strange
> comparisons. Convert i to be unsigned as well, which also removes the
> potential signed overflow in the unlikely case that cache_nr is over 2.1
> billion entries. The 'funny' variable has a maximum value of 11, so

AND a minimum value of 0 (which is important for the type change to be valid).

> making it unsigned does not change anything of importance.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  cache-tree.c | 17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/cache-tree.c b/cache-tree.c
> index 60b6aefbf51..acac6d58c37 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -151,16 +151,15 @@ void cache_tree_invalidate_path(struct index_state *istate, const char *path)
>                 istate->cache_changed |= CACHE_TREE_CHANGED;
>  }
>
> -static int verify_cache(struct cache_entry **cache,
> -                       int entries, int flags)
> +static int verify_cache(struct index_state *istate, int flags)
>  {
> -       int i, funny;
> +       unsigned i, funny;
>         int silent = flags & WRITE_TREE_SILENT;
>
>         /* Verify that the tree is merged */
>         funny = 0;
> -       for (i = 0; i < entries; i++) {
> -               const struct cache_entry *ce = cache[i];
> +       for (i = 0; i < istate->cache_nr; i++) {
> +               const struct cache_entry *ce = istate->cache[i];
>                 if (ce_stage(ce)) {
>                         if (silent)
>                                 return -1;
> @@ -180,13 +179,13 @@ static int verify_cache(struct cache_entry **cache,
>          * stage 0 entries.
>          */
>         funny = 0;
> -       for (i = 0; i < entries - 1; i++) {
> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
>                 /* path/file always comes after path because of the way
>                  * the cache is sorted.  Also path can appear only once,
>                  * which means conflicting one would immediately follow.
>                  */
> -               const struct cache_entry *this_ce = cache[i];
> -               const struct cache_entry *next_ce = cache[i + 1];
> +               const struct cache_entry *this_ce = istate->cache[i];
> +               const struct cache_entry *next_ce = istate->cache[i + 1];
>                 const char *this_name = this_ce->name;
>                 const char *next_name = next_ce->name;
>                 int this_len = ce_namelen(this_ce);
> @@ -438,7 +437,7 @@ int cache_tree_update(struct index_state *istate, int flags)
>  {
>         int skip, i;
>
> -       i = verify_cache(istate->cache, istate->cache_nr, flags);
> +       i = verify_cache(istate, flags);
>
>         if (i)
>                 return i;
> --
> gitgitgadget

Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
in the commit message; made it easier to read through quickly.  I'm
curious if it deserves a comment in the code too, since it does feel
slightly unusual.
Derrick Stolee Jan. 23, 2021, 9:02 p.m. UTC | #2
On 1/23/2021 3:24 PM, Elijah Newren wrote:
> On Sat, Jan 23, 2021 at 11:58 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>> -       for (i = 0; i < entries - 1; i++) {
>> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
>>                 /* path/file always comes after path because of the way
>>                  * the cache is sorted.  Also path can appear only once,
>>                  * which means conflicting one would immediately follow.
>>                  */
>> -               const struct cache_entry *this_ce = cache[i];
>> -               const struct cache_entry *next_ce = cache[i + 1];
>> +               const struct cache_entry *this_ce = istate->cache[i];
>> +               const struct cache_entry *next_ce = istate->cache[i + 1];
>>                 const char *this_name = this_ce->name;
>>                 const char *next_name = next_ce->name;
>>                 int this_len = ce_namelen(this_ce);
> Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
> in the commit message; made it easier to read through quickly.  I'm
> curious if it deserves a comment in the code too, since it does feel
> slightly unusual.

I would argue that "i + 1 < N" is a more natural way to write this,
because we use "i + 1" as an index, so we want to ensure the index
we are about to use is within range. "i < N - 1" is the backwards
way to write that statement.

Thanks,
-Stolee
Junio C Hamano Jan. 23, 2021, 9:10 p.m. UTC | #3
Elijah Newren <newren@gmail.com> writes:

>> -       for (i = 0; i < entries - 1; i++) {
>> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
>>                 /* path/file always comes after path because of the way
>>                  * the cache is sorted.  Also path can appear only once,
>>                  * which means conflicting one would immediately follow.
>>                  */
>> -               const struct cache_entry *this_ce = cache[i];
>> -               const struct cache_entry *next_ce = cache[i + 1];
>> +               const struct cache_entry *this_ce = istate->cache[i];
>> +               const struct cache_entry *next_ce = istate->cache[i + 1];
>
> Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
> in the commit message; made it easier to read through quickly.  I'm
> curious if it deserves a comment in the code too, since it does feel
> slightly unusual.

I think this is entirely my fault.

It probably reads more natural to start from 1 and interate through
to the end of the array, comparing the current one with the previous
entry, i.e.

	for (i = 1; i < istate->cache_nr; i++) {
        	prev = cache[i - 1];
		this = cache[i];
                compare(prev, this);
Elijah Newren Jan. 23, 2021, 9:10 p.m. UTC | #4
On Sat, Jan 23, 2021 at 1:02 PM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 1/23/2021 3:24 PM, Elijah Newren wrote:
> > On Sat, Jan 23, 2021 at 11:58 AM Derrick Stolee via GitGitGadget
> > <gitgitgadget@gmail.com> wrote:
> >> -       for (i = 0; i < entries - 1; i++) {
> >> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
> >>                 /* path/file always comes after path because of the way
> >>                  * the cache is sorted.  Also path can appear only once,
> >>                  * which means conflicting one would immediately follow.
> >>                  */
> >> -               const struct cache_entry *this_ce = cache[i];
> >> -               const struct cache_entry *next_ce = cache[i + 1];
> >> +               const struct cache_entry *this_ce = istate->cache[i];
> >> +               const struct cache_entry *next_ce = istate->cache[i + 1];
> >>                 const char *this_name = this_ce->name;
> >>                 const char *next_name = next_ce->name;
> >>                 int this_len = ce_namelen(this_ce);
> > Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
> > in the commit message; made it easier to read through quickly.  I'm
> > curious if it deserves a comment in the code too, since it does feel
> > slightly unusual.
>
> I would argue that "i + 1 < N" is a more natural way to write this,
> because we use "i + 1" as an index, so we want to ensure the index
> we are about to use is within range. "i < N - 1" is the backwards
> way to write that statement.

Oh, right, I think I was reading too quickly and assuming one thing in
my head (about what the code was going to do), and forgetting that
assumption when I got to the actual code.  Sorry about that; I agree
with you, so ignore my previous comment.
Derrick Stolee Jan. 23, 2021, 9:14 p.m. UTC | #5
On 1/23/2021 4:10 PM, Junio C Hamano wrote:
> Elijah Newren <newren@gmail.com> writes:
> 
>>> -       for (i = 0; i < entries - 1; i++) {
>>> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
>>>                 /* path/file always comes after path because of the way
>>>                  * the cache is sorted.  Also path can appear only once,
>>>                  * which means conflicting one would immediately follow.
>>>                  */
>>> -               const struct cache_entry *this_ce = cache[i];
>>> -               const struct cache_entry *next_ce = cache[i + 1];
>>> +               const struct cache_entry *this_ce = istate->cache[i];
>>> +               const struct cache_entry *next_ce = istate->cache[i + 1];
>>
>> Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
>> in the commit message; made it easier to read through quickly.  I'm
>> curious if it deserves a comment in the code too, since it does feel
>> slightly unusual.
> 
> I think this is entirely my fault.
> 
> It probably reads more natural to start from 1 and interate through
> to the end of the array, comparing the current one with the previous
> entry, i.e.
> 
> 	for (i = 1; i < istate->cache_nr; i++) {
>         	prev = cache[i - 1];
> 		this = cache[i];
>                 compare(prev, this);

This would be another natural way to make the loop extremely clear.

It's a bigger diff, changing the names of 'this' and 'next', but
perhaps worthwhile.

Thanks,
-Stolee
Junio C Hamano Jan. 23, 2021, 9:41 p.m. UTC | #6
Derrick Stolee <stolee@gmail.com> writes:

> On 1/23/2021 3:24 PM, Elijah Newren wrote:
>> On Sat, Jan 23, 2021 at 11:58 AM Derrick Stolee via GitGitGadget
>> <gitgitgadget@gmail.com> wrote:
>>> -       for (i = 0; i < entries - 1; i++) {
>>> +       for (i = 0; i + 1 < istate->cache_nr; i++) {
>>>                 /* path/file always comes after path because of the way
>>>                  * the cache is sorted.  Also path can appear only once,
>>>                  * which means conflicting one would immediately follow.
>>>                  */
>>> -               const struct cache_entry *this_ce = cache[i];
>>> -               const struct cache_entry *next_ce = cache[i + 1];
>>> +               const struct cache_entry *this_ce = istate->cache[i];
>>> +               const struct cache_entry *next_ce = istate->cache[i + 1];
>>>                 const char *this_name = this_ce->name;
>>>                 const char *next_name = next_ce->name;
>>>                 int this_len = ce_namelen(this_ce);
>> Makes sense.  Thanks for explaining the i + 1 < istate->cache_nr bit
>> in the commit message; made it easier to read through quickly.  I'm
>> curious if it deserves a comment in the code too, since it does feel
>> slightly unusual.
>
> I would argue that "i + 1 < N" is a more natural way to write this,
> because we use "i + 1" as an index, so we want to ensure the index
> we are about to use is within range. "i < N - 1" is the backwards
> way to write that statement.

Our mails have crossed, I guess.  Comparing i+1 and N is also good.
diff mbox series

Patch

diff --git a/cache-tree.c b/cache-tree.c
index 60b6aefbf51..acac6d58c37 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -151,16 +151,15 @@  void cache_tree_invalidate_path(struct index_state *istate, const char *path)
 		istate->cache_changed |= CACHE_TREE_CHANGED;
 }
 
-static int verify_cache(struct cache_entry **cache,
-			int entries, int flags)
+static int verify_cache(struct index_state *istate, int flags)
 {
-	int i, funny;
+	unsigned i, funny;
 	int silent = flags & WRITE_TREE_SILENT;
 
 	/* Verify that the tree is merged */
 	funny = 0;
-	for (i = 0; i < entries; i++) {
-		const struct cache_entry *ce = cache[i];
+	for (i = 0; i < istate->cache_nr; i++) {
+		const struct cache_entry *ce = istate->cache[i];
 		if (ce_stage(ce)) {
 			if (silent)
 				return -1;
@@ -180,13 +179,13 @@  static int verify_cache(struct cache_entry **cache,
 	 * stage 0 entries.
 	 */
 	funny = 0;
-	for (i = 0; i < entries - 1; i++) {
+	for (i = 0; i + 1 < istate->cache_nr; i++) {
 		/* path/file always comes after path because of the way
 		 * the cache is sorted.  Also path can appear only once,
 		 * which means conflicting one would immediately follow.
 		 */
-		const struct cache_entry *this_ce = cache[i];
-		const struct cache_entry *next_ce = cache[i + 1];
+		const struct cache_entry *this_ce = istate->cache[i];
+		const struct cache_entry *next_ce = istate->cache[i + 1];
 		const char *this_name = this_ce->name;
 		const char *next_name = next_ce->name;
 		int this_len = ce_namelen(this_ce);
@@ -438,7 +437,7 @@  int cache_tree_update(struct index_state *istate, int flags)
 {
 	int skip, i;
 
-	i = verify_cache(istate->cache, istate->cache_nr, flags);
+	i = verify_cache(istate, flags);
 
 	if (i)
 		return i;