diff mbox series

[v3,07/12] unpack-trees: stop recursing into sparse directories

Message ID 598375d3531fabe8582cb6d93838df09e1bd06c3.1621017072.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse-index: integrate with status | expand

Commit Message

Derrick Stolee May 14, 2021, 6:31 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

When walking trees using traverse_trees_recursive() and
unpack_callback(), we must not attempt to walk into a sparse directory
entry. There are no index entries within that directory to compare to
the tree object at that position, so skip over the entries of that tree.

This code is used in many places, so the only way to test it is to start
removing the command_requres_full_index option from one builtin at a
time and carefully test that its use of unpack_trees() behaves correctly
with a sparse-index. Such tests will be added by later changes.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 diff-lib.c     | 6 ++++++
 unpack-trees.c | 7 +++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

Comments

Elijah Newren May 18, 2021, 2:03 a.m. UTC | #1
On Fri, May 14, 2021 at 11:31 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> When walking trees using traverse_trees_recursive() and
> unpack_callback(), we must not attempt to walk into a sparse directory
> entry. There are no index entries within that directory to compare to
> the tree object at that position, so skip over the entries of that tree.
>
> This code is used in many places, so the only way to test it is to start
> removing the command_requres_full_index option from one builtin at a
> time and carefully test that its use of unpack_trees() behaves correctly
> with a sparse-index. Such tests will be added by later changes.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  diff-lib.c     | 6 ++++++
>  unpack-trees.c | 7 +++++--
>  2 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/diff-lib.c b/diff-lib.c
> index b73cc1859a49..d5e7e01132ee 100644
> --- a/diff-lib.c
> +++ b/diff-lib.c
> @@ -322,6 +322,9 @@ static void show_new_file(struct rev_info *revs,
>         unsigned int mode;
>         unsigned dirty_submodule = 0;
>
> +       if (S_ISSPARSEDIR(new_file->ce_mode))
> +               return;
> +

Makes sense, but is this related to the unpack-trees.c changes and the
commit message, or should it be in a separate commit?

>         /*
>          * New file in the index: it might actually be different in
>          * the working tree.
> @@ -343,6 +346,9 @@ static int show_modified(struct rev_info *revs,
>         const struct object_id *oid;
>         unsigned dirty_submodule = 0;
>
> +       if (S_ISSPARSEDIR(new_entry->ce_mode))
> +               return 0;
> +

Same question as above.  And a few more questions...

What if the old commit/tree had a file at this path, and the new
commit/tree has a (sparse) directory at this path?  Shouldn't
_something_ be shown for the file deletion?  Or does such a case not
run through this code path?

Also, wouldn't we expect it to be an error for show_modified() to be
called on a sparse directory?  If two sparse directories differed, we
should have inflated the trees to find the differences in the path
underneath them, right?  And if they didn't differ, then
show_modified() should not have been invoked?

I can see cases where we wouldn't want to bother looking at the
differences between to sparse directories, e.g. a
--restrict-to-sparsity-paths option to diff/log/etc, but I don't see
you setting this behind an option here.

>         if (get_stat_data(new_entry, &oid, &mode, cached, match_missing,
>                           &dirty_submodule, &revs->diffopt) < 0) {
>                 if (report_missing)
> diff --git a/unpack-trees.c b/unpack-trees.c
> index ef6a2b1c951c..703b0bdc9dfd 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1261,6 +1261,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>         struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
>         struct unpack_trees_options *o = info->data;
>         const struct name_entry *p = names;
> +       unsigned unpack_tree = 1;
>
>         /* Find first entry with a real name (we could use "mask" too) */
>         while (!p->mode)
> @@ -1307,7 +1308,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>                 }
>         }
>
> -       if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
> +       if (unpack_tree &&
> +           unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
>                 return -1;
>
>         if (o->merge && src[0]) {
> @@ -1337,7 +1339,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>                         }
>                 }
>
> -               if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
> +               if (unpack_tree &&
> +                   traverse_trees_recursive(n, dirmask, mask & ~dirmask,
>                                              names, info) < 0)
>                         return -1;
>                 return mask;
> --
> gitgitgadget

The unpack-trees.c changes make sense to me still.
Elijah Newren May 18, 2021, 2:06 a.m. UTC | #2
Sorry, I spoke too soon...

On Mon, May 17, 2021 at 7:03 PM Elijah Newren <newren@gmail.com> wrote:
>
> > diff --git a/unpack-trees.c b/unpack-trees.c
> > index ef6a2b1c951c..703b0bdc9dfd 100644
> > --- a/unpack-trees.c
> > +++ b/unpack-trees.c
> > @@ -1261,6 +1261,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
> >         struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
> >         struct unpack_trees_options *o = info->data;
> >         const struct name_entry *p = names;
> > +       unsigned unpack_tree = 1;

Here, you set unpack_tree to 1.

> >
> >         /* Find first entry with a real name (we could use "mask" too) */
> >         while (!p->mode)
> > @@ -1307,7 +1308,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
> >                 }
> >         }
> >
> > -       if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
> > +       if (unpack_tree &&

You check it's value here...

> > +           unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
> >                 return -1;
> >
> >         if (o->merge && src[0]) {
> > @@ -1337,7 +1339,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
> >                         }
> >                 }
> >
> > -               if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
> > +               if (unpack_tree &&
...and here....

> > +                   traverse_trees_recursive(n, dirmask, mask & ~dirmask,
> >                                              names, info) < 0)
> >                         return -1;
> >                 return mask;

but you never set unpack_tree to 0, so this is wasted effort and you
always recurse.  The previous iteration had a case where it'd set
unpack_tree to 0 in a certain case, but you deleted that code in this
version.  Why?
Derrick Stolee May 18, 2021, 7:20 p.m. UTC | #3
On 5/17/2021 10:06 PM, Elijah Newren wrote:
> Sorry, I spoke too soon...
> 
> On Mon, May 17, 2021 at 7:03 PM Elijah Newren <newren@gmail.com> wrote:
>>
>>> diff --git a/unpack-trees.c b/unpack-trees.c
>>> index ef6a2b1c951c..703b0bdc9dfd 100644
>>> --- a/unpack-trees.c
>>> +++ b/unpack-trees.c
>>> @@ -1261,6 +1261,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>>>         struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
>>>         struct unpack_trees_options *o = info->data;
>>>         const struct name_entry *p = names;
>>> +       unsigned unpack_tree = 1;
> 
> Here, you set unpack_tree to 1.
> 
>>>
>>>         /* Find first entry with a real name (we could use "mask" too) */
>>>         while (!p->mode)
>>> @@ -1307,7 +1308,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>>>                 }
>>>         }
>>>
>>> -       if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
>>> +       if (unpack_tree &&
> 
> You check it's value here...
> 
>>> +           unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
>>>                 return -1;
>>>
>>>         if (o->merge && src[0]) {
>>> @@ -1337,7 +1339,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
>>>                         }
>>>                 }
>>>
>>> -               if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
>>> +               if (unpack_tree &&
> ...and here....
> 
>>> +                   traverse_trees_recursive(n, dirmask, mask & ~dirmask,
>>>                                              names, info) < 0)
>>>                         return -1;
>>>                 return mask;
> 
> but you never set unpack_tree to 0, so this is wasted effort and you
> always recurse.  The previous iteration had a case where it'd set
> unpack_tree to 0 in a certain case, but you deleted that code in this
> version.  Why?

It appears that the changes to unpack-trees.c are no longer relevant,
and instead the changes to diff-lib.c (which were already out of place)
should instead be the focus. In fact, those changes to diff-lib.c can
be simplified and moved to path 10, so I will do that.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/diff-lib.c b/diff-lib.c
index b73cc1859a49..d5e7e01132ee 100644
--- a/diff-lib.c
+++ b/diff-lib.c
@@ -322,6 +322,9 @@  static void show_new_file(struct rev_info *revs,
 	unsigned int mode;
 	unsigned dirty_submodule = 0;
 
+	if (S_ISSPARSEDIR(new_file->ce_mode))
+		return;
+
 	/*
 	 * New file in the index: it might actually be different in
 	 * the working tree.
@@ -343,6 +346,9 @@  static int show_modified(struct rev_info *revs,
 	const struct object_id *oid;
 	unsigned dirty_submodule = 0;
 
+	if (S_ISSPARSEDIR(new_entry->ce_mode))
+		return 0;
+
 	if (get_stat_data(new_entry, &oid, &mode, cached, match_missing,
 			  &dirty_submodule, &revs->diffopt) < 0) {
 		if (report_missing)
diff --git a/unpack-trees.c b/unpack-trees.c
index ef6a2b1c951c..703b0bdc9dfd 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1261,6 +1261,7 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
 	struct unpack_trees_options *o = info->data;
 	const struct name_entry *p = names;
+	unsigned unpack_tree = 1;
 
 	/* Find first entry with a real name (we could use "mask" too) */
 	while (!p->mode)
@@ -1307,7 +1308,8 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 		}
 	}
 
-	if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
+	if (unpack_tree &&
+	    unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
 		return -1;
 
 	if (o->merge && src[0]) {
@@ -1337,7 +1339,8 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 			}
 		}
 
-		if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
+		if (unpack_tree &&
+		    traverse_trees_recursive(n, dirmask, mask & ~dirmask,
 					     names, info) < 0)
 			return -1;
 		return mask;