diff mbox series

[4/9] sparse-checkout: 'add' subcommand

Message ID 0f095e85d5bf29346bdc5bf1707bb51eaf2202ae.1566313865.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series New sparse-checkout builtin and "cone" mode | expand

Commit Message

John Passaro via GitGitGadget Aug. 20, 2019, 3:11 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The 'git sparse-checkout add' subcommand takes a list of patterns
over stdin and writes them to the sparse-checkout file. Then, it
updates the working directory using 'git read-tree -mu HEAD'.

Note: if a user adds a negative pattern that would lead to the
removal of a non-empty directory, then Git may not delete that
directory (on Windows).

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Documentation/git-sparse-checkout.txt |  4 ++++
 builtin/sparse-checkout.c             | 32 ++++++++++++++++++++++++++-
 t/t1091-sparse-checkout-builtin.sh    | 20 +++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)

Comments

Elijah Newren Aug. 23, 2019, 11:30 p.m. UTC | #1
On Tue, Aug 20, 2019 at 8:12 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The 'git sparse-checkout add' subcommand takes a list of patterns
> over stdin and writes them to the sparse-checkout file. Then, it
> updates the working directory using 'git read-tree -mu HEAD'.

As mentioned in response to the cover letter, I'd rather see it take
patterns as positional arguments (though requiring a '--' argument
before any patterns that start with a hyphen).  It could also take
--stdin to read from stdin.

> Note: if a user adds a negative pattern that would lead to the
> removal of a non-empty directory, then Git may not delete that
> directory (on Windows).

This sounds like you're re-iterating a bug mentioned earlier, but if
someone in the future comes and reads this comment it might sound like
you're saying git can avoid clearing a directory for optimization or
other reasons.  (And, of course, it'd be nice to figure out why this
bug exists.)

Another question this brings up, though, is that you worked around
this bug in 'init' so why would you not also do so for 'add'?  Seems
inconsistent to me.

> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  Documentation/git-sparse-checkout.txt |  4 ++++
>  builtin/sparse-checkout.c             | 32 ++++++++++++++++++++++++++-
>  t/t1091-sparse-checkout-builtin.sh    | 20 +++++++++++++++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
> index 50c53ee60a..6f540a3443 100644
> --- a/Documentation/git-sparse-checkout.txt
> +++ b/Documentation/git-sparse-checkout.txt
> @@ -34,6 +34,10 @@ COMMANDS
>         by Git. Add patterns to the sparse-checkout file to
>         repopulate the working directory.
>
> +'add'::
> +       Add a set of patterns to the sparse-checkout file, as given over
> +       stdin. Updates the working directory to match the new patterns.
> +
>  SPARSE CHECKOUT
>  ----------------
>
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index 86d24e6295..ec6134fecc 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -8,7 +8,7 @@
>  #include "strbuf.h"
>
>  static char const * const builtin_sparse_checkout_usage[] = {
> -       N_("git sparse-checkout [init|list]"),
> +       N_("git sparse-checkout [init|add|list]"),
>         NULL
>  };
>
> @@ -166,6 +166,34 @@ static int sparse_checkout_init(int argc, const char **argv)
>         return sc_read_tree();
>  }
>
> +static int sparse_checkout_add(int argc, const char **argv)
> +{
> +       struct exclude_list el;
> +       char *sparse_filename;
> +       FILE *fp;
> +       struct strbuf line = STRBUF_INIT;
> +
> +       memset(&el, 0, sizeof(el));
> +
> +       sparse_filename = get_sparse_checkout_filename();
> +       add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);

el is an exclude_list and we call add_excludes_..., but it's actually
an *include* list.  This is going to cause errors at some point, and
will cause lots of headaches.

> +
> +       fp = fopen(sparse_filename, "w");
> +       write_excludes_to_file(fp, &el);
> +
> +       while (!strbuf_getline(&line, stdin)) {
> +               strbuf_trim(&line);
> +               fprintf(fp, "%s\n", line.buf);
> +       }

Should we first check whether these excludes are already in the
sparse-checkout file?

> +       fclose(fp);
> +       free(sparse_filename);
> +
> +       clear_exclude_list(&el);
> +
> +       return sc_read_tree();

What if someone calls 'git sparse-checkout add' without first calling
'git sparse-checkout init'?  As far as I can tell, core.sparseCheckout
will be unset (i.e. treated as false), meaning that this operation
will do some work, but result in no changes and a report of success.
After users try to figure out why it won't work, they eventually run
'git sparse-checkout init', which will delete all the entries they
previously added with the 'add' subcommand.

What should happen instead?

> +}
> +
>  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>  {
>         static struct option builtin_sparse_checkout_options[] = {
> @@ -187,6 +215,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>                         return sparse_checkout_list(argc, argv);
>                 if (!strcmp(argv[0], "init"))
>                         return sparse_checkout_init(argc, argv);
> +               if (!strcmp(argv[0], "add"))
> +                       return sparse_checkout_add(argc, argv);
>         }
>
>         usage_with_options(builtin_sparse_checkout_usage,
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index b7d5f15830..499bd8d6d0 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -100,4 +100,24 @@ test_expect_success 'clone --sparse' '
>         test_cmp expect dir
>  '
>
> +test_expect_success 'add to existing sparse-checkout' '
> +       echo "/folder2/*" | git -C repo sparse-checkout add &&

I've always been using '/folder2/' in sparse-checkout, without the
trailing asterisk.  That seems more friendly for cone mode too.  Are
there benefits to keeping the trailing asterisk?

> +       cat >expect <<-EOF &&
> +               /*
> +               !/*/*
> +               /folder1/*
> +               /folder2/*
> +       EOF
> +       git -C repo sparse-checkout list >actual &&
> +       test_cmp expect actual &&
> +       test_cmp expect repo/.git/info/sparse-checkout &&
> +       ls repo >dir  &&
> +       cat >expect <<-EOF &&
> +               a
> +               folder1
> +               folder2
> +       EOF
> +       test_cmp expect dir
> +'
> +
>  test_done
> \ No newline at end of file
> --
> gitgitgadget
>
Derrick Stolee Sept. 18, 2019, 1:55 p.m. UTC | #2
On 8/23/2019 7:30 PM, Elijah Newren wrote:
> On Tue, Aug 20, 2019 at 8:12 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> The 'git sparse-checkout add' subcommand takes a list of patterns
>> over stdin and writes them to the sparse-checkout file. Then, it
>> updates the working directory using 'git read-tree -mu HEAD'.
> 
> As mentioned in response to the cover letter, I'd rather see it take
> patterns as positional arguments (though requiring a '--' argument
> before any patterns that start with a hyphen).  It could also take
> --stdin to read from stdin.
> 
>> Note: if a user adds a negative pattern that would lead to the
>> removal of a non-empty directory, then Git may not delete that
>> directory (on Windows).
> 
> This sounds like you're re-iterating a bug mentioned earlier, but if
> someone in the future comes and reads this comment it might sound like
> you're saying git can avoid clearing a directory for optimization or
> other reasons.  (And, of course, it'd be nice to figure out why this
> bug exists.)
> 
> Another question this brings up, though, is that you worked around
> this bug in 'init' so why would you not also do so for 'add'?  Seems
> inconsistent to me.
> 
>> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
>> ---
>>  Documentation/git-sparse-checkout.txt |  4 ++++
>>  builtin/sparse-checkout.c             | 32 ++++++++++++++++++++++++++-
>>  t/t1091-sparse-checkout-builtin.sh    | 20 +++++++++++++++++
>>  3 files changed, 55 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
>> index 50c53ee60a..6f540a3443 100644
>> --- a/Documentation/git-sparse-checkout.txt
>> +++ b/Documentation/git-sparse-checkout.txt
>> @@ -34,6 +34,10 @@ COMMANDS
>>         by Git. Add patterns to the sparse-checkout file to
>>         repopulate the working directory.
>>
>> +'add'::
>> +       Add a set of patterns to the sparse-checkout file, as given over
>> +       stdin. Updates the working directory to match the new patterns.
>> +
>>  SPARSE CHECKOUT
>>  ----------------
>>
>> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
>> index 86d24e6295..ec6134fecc 100644
>> --- a/builtin/sparse-checkout.c
>> +++ b/builtin/sparse-checkout.c
>> @@ -8,7 +8,7 @@
>>  #include "strbuf.h"
>>
>>  static char const * const builtin_sparse_checkout_usage[] = {
>> -       N_("git sparse-checkout [init|list]"),
>> +       N_("git sparse-checkout [init|add|list]"),
>>         NULL
>>  };
>>
>> @@ -166,6 +166,34 @@ static int sparse_checkout_init(int argc, const char **argv)
>>         return sc_read_tree();
>>  }
>>
>> +static int sparse_checkout_add(int argc, const char **argv)
>> +{
>> +       struct exclude_list el;
>> +       char *sparse_filename;
>> +       FILE *fp;
>> +       struct strbuf line = STRBUF_INIT;
>> +
>> +       memset(&el, 0, sizeof(el));
>> +
>> +       sparse_filename = get_sparse_checkout_filename();
>> +       add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);
> 
> el is an exclude_list and we call add_excludes_..., but it's actually
> an *include* list.  This is going to cause errors at some point, and
> will cause lots of headaches.
> 
>> +
>> +       fp = fopen(sparse_filename, "w");
>> +       write_excludes_to_file(fp, &el);
>> +
>> +       while (!strbuf_getline(&line, stdin)) {
>> +               strbuf_trim(&line);
>> +               fprintf(fp, "%s\n", line.buf);
>> +       }
> 
> Should we first check whether these excludes are already in the
> sparse-checkout file?
> 
>> +       fclose(fp);
>> +       free(sparse_filename);
>> +
>> +       clear_exclude_list(&el);
>> +
>> +       return sc_read_tree();
> 
> What if someone calls 'git sparse-checkout add' without first calling
> 'git sparse-checkout init'?  As far as I can tell, core.sparseCheckout
> will be unset (i.e. treated as false), meaning that this operation
> will do some work, but result in no changes and a report of success.
> After users try to figure out why it won't work, they eventually run
> 'git sparse-checkout init', which will delete all the entries they
> previously added with the 'add' subcommand.
> 
> What should happen instead?

If someone runs 'git sparse-checkout init' after an 'add', the
sparse-checkout file has contents, so the init does not overwrite
those.

(In the update I'm working on, 'init' doesn't delete folders, so
it will only remove the files that do not match the patterns.)

>> +}
>> +
>>  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>>  {
>>         static struct option builtin_sparse_checkout_options[] = {
>> @@ -187,6 +215,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>>                         return sparse_checkout_list(argc, argv);
>>                 if (!strcmp(argv[0], "init"))
>>                         return sparse_checkout_init(argc, argv);
>> +               if (!strcmp(argv[0], "add"))
>> +                       return sparse_checkout_add(argc, argv);
>>         }
>>
>>         usage_with_options(builtin_sparse_checkout_usage,
>> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
>> index b7d5f15830..499bd8d6d0 100755
>> --- a/t/t1091-sparse-checkout-builtin.sh
>> +++ b/t/t1091-sparse-checkout-builtin.sh
>> @@ -100,4 +100,24 @@ test_expect_success 'clone --sparse' '
>>         test_cmp expect dir
>>  '
>>
>> +test_expect_success 'add to existing sparse-checkout' '
>> +       echo "/folder2/*" | git -C repo sparse-checkout add &&
> 
> I've always been using '/folder2/' in sparse-checkout, without the
> trailing asterisk.  That seems more friendly for cone mode too.  Are
> there benefits to keeping the trailing asterisk?

I think I've been seeing issues with pattern matching on Windows without
the trailing asterisk. I'm currently double-checking to make sure this
is important or not.
 
>> +       cat >expect <<-EOF &&
>> +               /*
>> +               !/*/*
>> +               /folder1/*
>> +               /folder2/*
>> +       EOF
>> +       git -C repo sparse-checkout list >actual &&
>> +       test_cmp expect actual &&
>> +       test_cmp expect repo/.git/info/sparse-checkout &&
>> +       ls repo >dir  &&
>> +       cat >expect <<-EOF &&
>> +               a
>> +               folder1
>> +               folder2
>> +       EOF
>> +       test_cmp expect dir
>> +'
>> +
>>  test_done
>> \ No newline at end of file

I'm trying to fix this newline issue everywhere.

Thanks,
-Stolee
Elijah Newren Sept. 18, 2019, 2:56 p.m. UTC | #3
On Wed, Sep 18, 2019 at 6:55 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 8/23/2019 7:30 PM, Elijah Newren wrote:
> > On Tue, Aug 20, 2019 at 8:12 AM Derrick Stolee via GitGitGadget
> > <gitgitgadget@gmail.com> wrote:
> >>
...
> >> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> >> index b7d5f15830..499bd8d6d0 100755
> >> --- a/t/t1091-sparse-checkout-builtin.sh
> >> +++ b/t/t1091-sparse-checkout-builtin.sh
> >> @@ -100,4 +100,24 @@ test_expect_success 'clone --sparse' '
> >>         test_cmp expect dir
> >>  '
> >>
> >> +test_expect_success 'add to existing sparse-checkout' '
> >> +       echo "/folder2/*" | git -C repo sparse-checkout add &&
> >
> > I've always been using '/folder2/' in sparse-checkout, without the
> > trailing asterisk.  That seems more friendly for cone mode too.  Are
> > there benefits to keeping the trailing asterisk?
>
> I think I've been seeing issues with pattern matching on Windows without
> the trailing asterisk. I'm currently double-checking to make sure this
> is important or not.

Can you try with the en/clean-nested-with-ignored topic in pu to see
if that fixes those issues?
Derrick Stolee Sept. 18, 2019, 5:23 p.m. UTC | #4
On 9/18/2019 10:56 AM, Elijah Newren wrote:
> On Wed, Sep 18, 2019 at 6:55 AM Derrick Stolee <stolee@gmail.com> wrote:
>>
>> On 8/23/2019 7:30 PM, Elijah Newren wrote:
>>> On Tue, Aug 20, 2019 at 8:12 AM Derrick Stolee via GitGitGadget
>>> <gitgitgadget@gmail.com> wrote:
>>>>
> ...
>>>> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
>>>> index b7d5f15830..499bd8d6d0 100755
>>>> --- a/t/t1091-sparse-checkout-builtin.sh
>>>> +++ b/t/t1091-sparse-checkout-builtin.sh
>>>> @@ -100,4 +100,24 @@ test_expect_success 'clone --sparse' '
>>>>         test_cmp expect dir
>>>>  '
>>>>
>>>> +test_expect_success 'add to existing sparse-checkout' '
>>>> +       echo "/folder2/*" | git -C repo sparse-checkout add &&
>>>
>>> I've always been using '/folder2/' in sparse-checkout, without the
>>> trailing asterisk.  That seems more friendly for cone mode too.  Are
>>> there benefits to keeping the trailing asterisk?
>>
>> I think I've been seeing issues with pattern matching on Windows without
>> the trailing asterisk. I'm currently double-checking to make sure this
>> is important or not.
> 
> Can you try with the en/clean-nested-with-ignored topic in pu to see
> if that fixes those issues?

Merging with that branch was very difficult. There is a lot of unshared
history between our branches.

Instead, I tried once more to dig into the strange issue on Windows, and
it appears it is an issue with how the Git for Windows SDK modifies shell
arguments with a "/".

When I ran `git sparse-checkout set "/folder1/*"` it worked.

When I run `git sparse-checkout set "/folder1/"`, the SDK completes that
argument to "C:/git-sdk-64/folder1/" on my machine (something more
complicated on the build machine). It's not actually a bug in the Git
code, but something in the build and test environment.

I can get around it by testing the builtin without using these cone-like
patterns. When using `git sparse-checkout set folder1 folder2` in cone
mode, Git does the right thing.

Sorry for the noise here.

-Stolee
diff mbox series

Patch

diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
index 50c53ee60a..6f540a3443 100644
--- a/Documentation/git-sparse-checkout.txt
+++ b/Documentation/git-sparse-checkout.txt
@@ -34,6 +34,10 @@  COMMANDS
 	by Git. Add patterns to the sparse-checkout file to
 	repopulate the working directory.
 
+'add'::
+	Add a set of patterns to the sparse-checkout file, as given over
+	stdin. Updates the working directory to match the new patterns.
+
 SPARSE CHECKOUT
 ----------------
 
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 86d24e6295..ec6134fecc 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -8,7 +8,7 @@ 
 #include "strbuf.h"
 
 static char const * const builtin_sparse_checkout_usage[] = {
-	N_("git sparse-checkout [init|list]"),
+	N_("git sparse-checkout [init|add|list]"),
 	NULL
 };
 
@@ -166,6 +166,34 @@  static int sparse_checkout_init(int argc, const char **argv)
 	return sc_read_tree();
 }
 
+static int sparse_checkout_add(int argc, const char **argv)
+{
+	struct exclude_list el;
+	char *sparse_filename;
+	FILE *fp;
+	struct strbuf line = STRBUF_INIT;
+
+	memset(&el, 0, sizeof(el));
+
+	sparse_filename = get_sparse_checkout_filename();
+	add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);
+
+	fp = fopen(sparse_filename, "w");
+	write_excludes_to_file(fp, &el);
+
+	while (!strbuf_getline(&line, stdin)) {
+		strbuf_trim(&line);
+		fprintf(fp, "%s\n", line.buf);
+	}
+
+	fclose(fp);
+	free(sparse_filename);
+
+	clear_exclude_list(&el);
+
+	return sc_read_tree();
+}
+
 int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 {
 	static struct option builtin_sparse_checkout_options[] = {
@@ -187,6 +215,8 @@  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 			return sparse_checkout_list(argc, argv);
 		if (!strcmp(argv[0], "init"))
 			return sparse_checkout_init(argc, argv);
+		if (!strcmp(argv[0], "add"))
+			return sparse_checkout_add(argc, argv);
 	}
 
 	usage_with_options(builtin_sparse_checkout_usage,
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index b7d5f15830..499bd8d6d0 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -100,4 +100,24 @@  test_expect_success 'clone --sparse' '
 	test_cmp expect dir
 '
 
+test_expect_success 'add to existing sparse-checkout' '
+	echo "/folder2/*" | git -C repo sparse-checkout add &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/*
+		/folder1/*
+		/folder2/*
+	EOF
+	git -C repo sparse-checkout list >actual &&
+	test_cmp expect actual &&
+	test_cmp expect repo/.git/info/sparse-checkout &&
+	ls repo >dir  &&
+	cat >expect <<-EOF &&
+		a
+		folder1
+		folder2
+	EOF
+	test_cmp expect dir
+'
+
 test_done
\ No newline at end of file