diff mbox series

[3/7] sparse-checkout: pay attention to prefix for {set, add}

Message ID 679f869ff11b5c3e61081018f7eafa81c334f3d1.1644712798.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series RFC: sparse checkout: make --cone mode the default, and check add/set argument validity | expand

Commit Message

Elijah Newren Feb. 13, 2022, 12:39 a.m. UTC
From: Elijah Newren <newren@gmail.com>

In cone mode, non-option arguments to set & add are clearly paths, and
as such, we should pay attention to prefix.

In non-cone mode, it is not clear that folks intend to provide paths
since the inputs are gitignore-style patterns.  Paying attention to
prefix would prevent folks from doing things like
   git sparse-checkout add /.gitattributes
   git sparse-checkout add '/toplevel-dir/*'
In fact, the former will result in
   fatal: '/.gitattributes' is outside repository...
while the later will result in
   fatal: Invalid path '/toplevel-dir': No such file or directory
despite the fact that both are valid gitignore-style patterns that would
select real files if added to the sparse-checkout file.  However, these
commands can be run successfully from the toplevel directory, and many
gitignore-style patterns ARE paths, and bash completion seems to be
suggesting directories and files, so perhaps for consistency we pay
attention to the prefix?  It's not clear what is okay here, but maybe
that's yet another reason to deprecate non-cone mode as we will do later
in this series.

For now, incorporate prefix into the positional arguments for either
cone or non-cone mode.  For additional discussion of this issue, see
https://lore.kernel.org/git/29f0410e-6dfa-2e86-394d-b1fb735e7608@gmail.com/

Helped-by: Junio Hamano <gitster@pobox.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
---
 builtin/sparse-checkout.c          | 22 ++++++++++++++++++++++
 t/t1091-sparse-checkout-builtin.sh | 27 +++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

Comments

Derrick Stolee Feb. 14, 2022, 3:49 p.m. UTC | #1
On 2/12/2022 7:39 PM, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@gmail.com>
> 
> In cone mode, non-option arguments to set & add are clearly paths, and
> as such, we should pay attention to prefix.
> 
> In non-cone mode, it is not clear that folks intend to provide paths
> since the inputs are gitignore-style patterns.  Paying attention to
> prefix would prevent folks from doing things like
>    git sparse-checkout add /.gitattributes
>    git sparse-checkout add '/toplevel-dir/*'
> In fact, the former will result in
>    fatal: '/.gitattributes' is outside repository...
> while the later will result in
>    fatal: Invalid path '/toplevel-dir': No such file or directory
> despite the fact that both are valid gitignore-style patterns that would
> select real files if added to the sparse-checkout file.  However, these
> commands can be run successfully from the toplevel directory, and many
> gitignore-style patterns ARE paths, and bash completion seems to be
> suggesting directories and files, so perhaps for consistency we pay
> attention to the prefix?  It's not clear what is okay here, but maybe
> that's yet another reason to deprecate non-cone mode as we will do later
> in this series.
> 
> For now, incorporate prefix into the positional arguments for either
> cone or non-cone mode.  For additional discussion of this issue, see
> https://lore.kernel.org/git/29f0410e-6dfa-2e86-394d-b1fb735e7608@gmail.com/

Perhaps this was covered in the issue, but for non-cone mode, it
matters if there is a leading slash or not in the pattern. Will
this change make it impossible for a user to input that distinction?

Will there still be a difference between:

	git sparse-checkout set --no-cone /.vs/

and

	git sparse-checkout set --no-cone .vs/

?

> Helped-by: Junio Hamano <gitster@pobox.com>
> Signed-off-by: Elijah Newren <newren@gmail.com>

This could probably use a

  Reported-by: Lessley Dennington <lessleydennington@gmail.com>

> +static void sanitize_paths(int argc, const char **argv, const char *prefix)
> +{
> +	if (!argc)
> +		return;
> +
> +	if (prefix && *prefix) {
> +		/*
> +		 * The args are not pathspecs, so unfortunately we
> +		 * cannot imitate how cmd_add() uses parse_pathspec().
> +		 */
> +		int i;
> +		int prefix_len = strlen(prefix);
> +
> +		for (i = 0; i < argc; i++)
> +			argv[i] = prefix_path(prefix, prefix_len, argv[i]);
> +	}
> +}
> +
>  static char const * const builtin_sparse_checkout_add_usage[] = {
>  	N_("git sparse-checkout add (--stdin | <patterns>)"),
>  	NULL
> @@ -708,6 +726,8 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix)
>  			     builtin_sparse_checkout_add_usage,
>  			     PARSE_OPT_KEEP_UNKNOWN);
>  
> +	sanitize_paths(argc, argv, prefix);
> +
>  	return modify_pattern_list(argc, argv, add_opts.use_stdin, ADD);
>  }
>  
> @@ -759,6 +779,8 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix)
>  	if (!core_sparse_checkout_cone && argc == 0) {
>  		argv = default_patterns;
>  		argc = default_patterns_nr;
> +	} else {
> +		sanitize_paths(argc, argv, prefix);
>  	}

Code changes appear to do as described: apply the prefix everywhere
it matters, no matter the mode.

> +test_expect_success 'set from subdir pays attention to prefix' '
> +	git -C repo sparse-checkout disable &&
> +	git -C repo/deep sparse-checkout set --cone deeper2 ../folder1 &&
> +
> +	git -C repo sparse-checkout list >actual &&
> +
> +	cat >expect <<-\EOF &&
> +	deep/deeper2
> +	folder1
> +	EOF
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'add from subdir pays attention to prefix' '
> +	git -C repo sparse-checkout set --cone deep/deeper2 &&
> +	git -C repo/deep sparse-checkout add deeper1/deepest ../folder1 &&
> +
> +	git -C repo sparse-checkout list >actual &&
> +
> +	cat >expect <<-\EOF &&
> +	deep/deeper1/deepest
> +	deep/deeper2
> +	folder1
> +	EOF
> +	test_cmp expect actual
> +'
> +
>  test_done

These tests could use a non-cone-mode version to demonstrate the behavior
in that mode.

Thanks,
-Stolee
Elijah Newren Feb. 15, 2022, 3:52 a.m. UTC | #2
On Mon, Feb 14, 2022 at 7:50 AM Derrick Stolee <derrickstolee@github.com> wrote:
>
> On 2/12/2022 7:39 PM, Elijah Newren via GitGitGadget wrote:
> > From: Elijah Newren <newren@gmail.com>
> >
> > In cone mode, non-option arguments to set & add are clearly paths, and
> > as such, we should pay attention to prefix.
> >
> > In non-cone mode, it is not clear that folks intend to provide paths
> > since the inputs are gitignore-style patterns.  Paying attention to
> > prefix would prevent folks from doing things like
> >    git sparse-checkout add /.gitattributes
> >    git sparse-checkout add '/toplevel-dir/*'
> > In fact, the former will result in
> >    fatal: '/.gitattributes' is outside repository...
> > while the later will result in
> >    fatal: Invalid path '/toplevel-dir': No such file or directory
> > despite the fact that both are valid gitignore-style patterns that would
> > select real files if added to the sparse-checkout file.  However, these
> > commands can be run successfully from the toplevel directory, and many
> > gitignore-style patterns ARE paths, and bash completion seems to be
> > suggesting directories and files, so perhaps for consistency we pay
> > attention to the prefix?  It's not clear what is okay here, but maybe
> > that's yet another reason to deprecate non-cone mode as we will do later
> > in this series.
> >
> > For now, incorporate prefix into the positional arguments for either
> > cone or non-cone mode.  For additional discussion of this issue, see
> > https://lore.kernel.org/git/29f0410e-6dfa-2e86-394d-b1fb735e7608@gmail.com/
>
> Perhaps this was covered in the issue, but for non-cone mode, it
> matters if there is a leading slash or not in the pattern. Will
> this change make it impossible for a user to input that distinction?
>
> Will there still be a difference between:
>
>         git sparse-checkout set --no-cone /.vs/
>
> and
>
>         git sparse-checkout set --no-cone .vs/
>
> ?

If you are in the toplevel directory, you can run either of these and
they have the same meaning they traditionally had.

Before this patch, if you are in a subdirectory, the first of those
would have specified a toplevel ".vs" directory, and the second would
have specified a ".vs/" directory in the toplevel OR any subdirectory.
Those choices might be what the user wanted, or both of those could be
a nasty surprise for the user.

After this patch, if you are in a subdirectory, the first of those
throw an error:
    $ git sparse-checkout set --no-cone /.vs/
    fatal: Invalid path '/.vs': No such file or directory
(which might be an annoyance, but how would you possibly specify a
leading slash on a path that needs to be prefixed anyway?)  The second
will specify a SUBDIR/.vs/ from the toplevel directory (which again,
might be what the user wanted, or might be a nasty surprise if they
were trying to specify a pattern relative to the root).

Does this change make sense?  For some users, sure -- especially those
with the idea that you specify paths for non-cone mode (though
bash-completion may guide folks to presume that).  But for those who
understand that non-cone mode is all about patterns and that we have a
single toplevel file where everything must be recorded, it's possibly
detrimental to them.  To me, I wonder if it seems fraught with nasty
surprises for us to do anything other than throw an error when
--no-cone is specified and we are in a subdirectory.  Perhaps I should
do that instead of this change here.

> > Helped-by: Junio Hamano <gitster@pobox.com>
> > Signed-off-by: Elijah Newren <newren@gmail.com>
>
> This could probably use a
>
>   Reported-by: Lessley Dennington <lessleydennington@gmail.com>

It'd be more of a "Report-Formalized-by:" if we were to include such a
tag.  Check the history here:
https://lore.kernel.org/git/52d638fc-e7e7-1b0a-482b-cff7c9500b92@gmail.com/

In short: I was the original reporter; I noted the issue while
reviewing her completion series.  The bug was not related to her
series, but her series did prompt me to check and discover the issue.
She didn't want the issue to get lost, and decided to make a formal
report.

> > +static void sanitize_paths(int argc, const char **argv, const char *prefix)
> > +{
> > +     if (!argc)
> > +             return;
> > +
> > +     if (prefix && *prefix) {
> > +             /*
> > +              * The args are not pathspecs, so unfortunately we
> > +              * cannot imitate how cmd_add() uses parse_pathspec().
> > +              */
> > +             int i;
> > +             int prefix_len = strlen(prefix);
> > +
> > +             for (i = 0; i < argc; i++)
> > +                     argv[i] = prefix_path(prefix, prefix_len, argv[i]);
> > +     }
> > +}
> > +
> >  static char const * const builtin_sparse_checkout_add_usage[] = {
> >       N_("git sparse-checkout add (--stdin | <patterns>)"),
> >       NULL
> > @@ -708,6 +726,8 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix)
> >                            builtin_sparse_checkout_add_usage,
> >                            PARSE_OPT_KEEP_UNKNOWN);
> >
> > +     sanitize_paths(argc, argv, prefix);
> > +
> >       return modify_pattern_list(argc, argv, add_opts.use_stdin, ADD);
> >  }
> >
> > @@ -759,6 +779,8 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix)
> >       if (!core_sparse_checkout_cone && argc == 0) {
> >               argv = default_patterns;
> >               argc = default_patterns_nr;
> > +     } else {
> > +             sanitize_paths(argc, argv, prefix);
> >       }
>
> Code changes appear to do as described: apply the prefix everywhere
> it matters, no matter the mode.
>
> > +test_expect_success 'set from subdir pays attention to prefix' '
> > +     git -C repo sparse-checkout disable &&
> > +     git -C repo/deep sparse-checkout set --cone deeper2 ../folder1 &&
> > +
> > +     git -C repo sparse-checkout list >actual &&
> > +
> > +     cat >expect <<-\EOF &&
> > +     deep/deeper2
> > +     folder1
> > +     EOF
> > +     test_cmp expect actual
> > +'
> > +
> > +test_expect_success 'add from subdir pays attention to prefix' '
> > +     git -C repo sparse-checkout set --cone deep/deeper2 &&
> > +     git -C repo/deep sparse-checkout add deeper1/deepest ../folder1 &&
> > +
> > +     git -C repo sparse-checkout list >actual &&
> > +
> > +     cat >expect <<-\EOF &&
> > +     deep/deeper1/deepest
> > +     deep/deeper2
> > +     folder1
> > +     EOF
> > +     test_cmp expect actual
> > +'
> > +
> >  test_done
>
> These tests could use a non-cone-mode version to demonstrate the behavior
> in that mode.

Fair enough, though I hesitated in part because I wasn't sure we even
wanted to make that change, and I figured getting that answer might be
useful before writing the tests.
Derrick Stolee Feb. 15, 2022, 2:53 p.m. UTC | #3
On 2/14/2022 10:52 PM, Elijah Newren wrote:
> On Mon, Feb 14, 2022 at 7:50 AM Derrick Stolee <derrickstolee@github.com> wrote:
>>
>> On 2/12/2022 7:39 PM, Elijah Newren via GitGitGadget wrote:
>>> From: Elijah Newren <newren@gmail.com>
>>>
>>> In cone mode, non-option arguments to set & add are clearly paths, and
>>> as such, we should pay attention to prefix.
>>>
>>> In non-cone mode, it is not clear that folks intend to provide paths
>>> since the inputs are gitignore-style patterns.  Paying attention to
>>> prefix would prevent folks from doing things like
>>>    git sparse-checkout add /.gitattributes
>>>    git sparse-checkout add '/toplevel-dir/*'
>>> In fact, the former will result in
>>>    fatal: '/.gitattributes' is outside repository...
>>> while the later will result in
>>>    fatal: Invalid path '/toplevel-dir': No such file or directory
>>> despite the fact that both are valid gitignore-style patterns that would
>>> select real files if added to the sparse-checkout file.  However, these
>>> commands can be run successfully from the toplevel directory, and many
>>> gitignore-style patterns ARE paths, and bash completion seems to be
>>> suggesting directories and files, so perhaps for consistency we pay
>>> attention to the prefix?  It's not clear what is okay here, but maybe
>>> that's yet another reason to deprecate non-cone mode as we will do later
>>> in this series.
>>>
>>> For now, incorporate prefix into the positional arguments for either
>>> cone or non-cone mode.  For additional discussion of this issue, see
>>> https://lore.kernel.org/git/29f0410e-6dfa-2e86-394d-b1fb735e7608@gmail.com/
>>
>> Perhaps this was covered in the issue, but for non-cone mode, it
>> matters if there is a leading slash or not in the pattern. Will
>> this change make it impossible for a user to input that distinction?
>>
>> Will there still be a difference between:
>>
>>         git sparse-checkout set --no-cone /.vs/
>>
>> and
>>
>>         git sparse-checkout set --no-cone .vs/
>>
>> ?
> 
> If you are in the toplevel directory, you can run either of these and
> they have the same meaning they traditionally had.
> 
> Before this patch, if you are in a subdirectory, the first of those
> would have specified a toplevel ".vs" directory, and the second would
> have specified a ".vs/" directory in the toplevel OR any subdirectory.
> Those choices might be what the user wanted, or both of those could be
> a nasty surprise for the user.
> 
> After this patch, if you are in a subdirectory, the first of those
> throw an error:
>     $ git sparse-checkout set --no-cone /.vs/
>     fatal: Invalid path '/.vs': No such file or directory
> (which might be an annoyance, but how would you possibly specify a
> leading slash on a path that needs to be prefixed anyway?)  The second
> will specify a SUBDIR/.vs/ from the toplevel directory (which again,
> might be what the user wanted, or might be a nasty surprise if they
> were trying to specify a pattern relative to the root).
> 
> Does this change make sense?  For some users, sure -- especially those
> with the idea that you specify paths for non-cone mode (though
> bash-completion may guide folks to presume that).  But for those who
> understand that non-cone mode is all about patterns and that we have a
> single toplevel file where everything must be recorded, it's possibly
> detrimental to them.  To me, I wonder if it seems fraught with nasty
> surprises for us to do anything other than throw an error when
> --no-cone is specified and we are in a subdirectory.  Perhaps I should
> do that instead of this change here.

I'd be in favor of this second approach of requiring the base directory.

>>> Helped-by: Junio Hamano <gitster@pobox.com>
>>> Signed-off-by: Elijah Newren <newren@gmail.com>
>>
>> This could probably use a
>>
>>   Reported-by: Lessley Dennington <lessleydennington@gmail.com>
> 
> It'd be more of a "Report-Formalized-by:" if we were to include such a
> tag.  Check the history here:
> https://lore.kernel.org/git/52d638fc-e7e7-1b0a-482b-cff7c9500b92@gmail.com/
> 
> In short: I was the original reporter; I noted the issue while
> reviewing her completion series.  The bug was not related to her
> series, but her series did prompt me to check and discover the issue.
> She didn't want the issue to get lost, and decided to make a formal
> report.

That makes sense. I wasn't caught up with that conversation.

>> These tests could use a non-cone-mode version to demonstrate the behavior
>> in that mode.
> 
> Fair enough, though I hesitated in part because I wasn't sure we even
> wanted to make that change, and I figured getting that answer might be
> useful before writing the tests.

Understandable.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 8d595189ea3..bec423d5af9 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -681,6 +681,24 @@  static int modify_pattern_list(int argc, const char **argv, int use_stdin,
 	return result;
 }
 
+static void sanitize_paths(int argc, const char **argv, const char *prefix)
+{
+	if (!argc)
+		return;
+
+	if (prefix && *prefix) {
+		/*
+		 * The args are not pathspecs, so unfortunately we
+		 * cannot imitate how cmd_add() uses parse_pathspec().
+		 */
+		int i;
+		int prefix_len = strlen(prefix);
+
+		for (i = 0; i < argc; i++)
+			argv[i] = prefix_path(prefix, prefix_len, argv[i]);
+	}
+}
+
 static char const * const builtin_sparse_checkout_add_usage[] = {
 	N_("git sparse-checkout add (--stdin | <patterns>)"),
 	NULL
@@ -708,6 +726,8 @@  static int sparse_checkout_add(int argc, const char **argv, const char *prefix)
 			     builtin_sparse_checkout_add_usage,
 			     PARSE_OPT_KEEP_UNKNOWN);
 
+	sanitize_paths(argc, argv, prefix);
+
 	return modify_pattern_list(argc, argv, add_opts.use_stdin, ADD);
 }
 
@@ -759,6 +779,8 @@  static int sparse_checkout_set(int argc, const char **argv, const char *prefix)
 	if (!core_sparse_checkout_cone && argc == 0) {
 		argv = default_patterns;
 		argc = default_patterns_nr;
+	} else {
+		sanitize_paths(argc, argv, prefix);
 	}
 
 	return modify_pattern_list(argc, argv, set_opts.use_stdin, REPLACE);
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 4a7394f7a58..b9e6987f5a6 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -816,4 +816,31 @@  test_expect_success 'malformed cone-mode patterns' '
 	grep "warning: disabling cone pattern matching" err
 '
 
+test_expect_success 'set from subdir pays attention to prefix' '
+	git -C repo sparse-checkout disable &&
+	git -C repo/deep sparse-checkout set --cone deeper2 ../folder1 &&
+
+	git -C repo sparse-checkout list >actual &&
+
+	cat >expect <<-\EOF &&
+	deep/deeper2
+	folder1
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'add from subdir pays attention to prefix' '
+	git -C repo sparse-checkout set --cone deep/deeper2 &&
+	git -C repo/deep sparse-checkout add deeper1/deepest ../folder1 &&
+
+	git -C repo sparse-checkout list >actual &&
+
+	cat >expect <<-\EOF &&
+	deep/deeper1/deepest
+	deep/deeper2
+	folder1
+	EOF
+	test_cmp expect actual
+'
+
 test_done