diff mbox series

[v5,02/17] sparse-checkout: create 'init' subcommand

Message ID a161cee0dfec76e7a08253083f488e2e6d26299e.1571666186.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series New sparse-checkout builtin and "cone" mode | expand

Commit Message

John Passaro via GitGitGadget Oct. 21, 2019, 1:56 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

Getting started with a sparse-checkout file can be daunting. Help
users start their sparse enlistment using 'git sparse-checkout init'.
This will set 'core.sparseCheckout=true' in their config, write
an initial set of patterns to the sparse-checkout file, and update
their working directory.

Make sure to use the `extensions.worktreeConfig` setting and write
the sparse checkout config to the worktree-specific config file.
This avoids confusing interactions with other worktrees.

The use of running another process for 'git read-tree' is sub-
optimal. This will be removed in a later change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Documentation/git-sparse-checkout.txt | 11 ++++
 builtin/sparse-checkout.c             | 79 ++++++++++++++++++++++++++-
 t/t1091-sparse-checkout-builtin.sh    | 41 ++++++++++++++
 3 files changed, 130 insertions(+), 1 deletion(-)

Comments

SZEDER Gábor Nov. 19, 2019, 2:33 p.m. UTC | #1
On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
> 
> Getting started with a sparse-checkout file can be daunting. Help
> users start their sparse enlistment using 'git sparse-checkout init'.
> This will set 'core.sparseCheckout=true' in their config, write
> an initial set of patterns to the sparse-checkout file, and update
> their working directory.

Reading this I was wandering what those "initial set of patterns"
might be ...

> Make sure to use the `extensions.worktreeConfig` setting and write
> the sparse checkout config to the worktree-specific config file.
> This avoids confusing interactions with other worktrees.
> 
> The use of running another process for 'git read-tree' is sub-
> optimal. This will be removed in a later change.
> 
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  Documentation/git-sparse-checkout.txt | 11 ++++
>  builtin/sparse-checkout.c             | 79 ++++++++++++++++++++++++++-
>  t/t1091-sparse-checkout-builtin.sh    | 41 ++++++++++++++
>  3 files changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
> index 9d6ca22917..930a361567 100644
> --- a/Documentation/git-sparse-checkout.txt
> +++ b/Documentation/git-sparse-checkout.txt
> @@ -30,6 +30,17 @@ COMMANDS
>  'list'::
>  	Provide a list of the contents in the sparse-checkout file.
>  
> +'init'::
> +	Enable the `core.sparseCheckout` setting. If the
> +	sparse-checkout file does not exist, then populate it with
> +	patterns that match every file in the root directory and
> +	no other directories, then will remove all directories tracked
> +	by Git. Add patterns to the sparse-checkout file to
> +	repopulate the working directory.

... and then reading this I was wandering why these are those "initial
set of patterns".

> ++
> +To avoid interfering with other worktrees, it first enables the
> +`extensions.worktreeConfig` setting and makes sure to set the
> +`core.sparseCheckout` setting in the worktree-specific config file.
>  
>  SPARSE CHECKOUT
>  ---------------
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index 5717c9b2cb..77aa52ca01 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -8,7 +8,7 @@
>  #include "strbuf.h"
>  
>  static char const * const builtin_sparse_checkout_usage[] = {
> -	N_("git sparse-checkout list"),
> +	N_("git sparse-checkout (init|list)"),
>  	NULL
>  };
>  
> @@ -59,6 +59,81 @@ static int sparse_checkout_list(int argc, const char **argv)
>  	return 0;
>  }
>  
> +static int update_working_directory(void)
> +{
> +	struct argv_array argv = ARGV_ARRAY_INIT;
> +	int result = 0;
> +	argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL);
> +
> +	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
> +		error(_("failed to update index with new sparse-checkout paths"));
> +		result = 1;
> +	}
> +
> +	argv_array_clear(&argv);
> +	return result;
> +}
> +
> +enum sparse_checkout_mode {
> +	MODE_NO_PATTERNS = 0,
> +	MODE_ALL_PATTERNS = 1,
> +};
> +
> +static int sc_set_config(enum sparse_checkout_mode mode)

Nit: s/sc_//, perhaps?  I suppose that "sc" prefix stands for "sparse
checkout", but this is a static function in
'builtin/sparse-checkout.c', so it doesn't need a distinguising
prefix.  Even at the end of this patch series no other functions have
this "sc" prefix.

> +{
> +	struct argv_array argv = ARGV_ARRAY_INIT;

This 'argv_array' is not cleared at the end of the function, but...

> +	if (git_config_set_gently("extensions.worktreeConfig", "true")) {
> +		error(_("failed to set extensions.worktreeConfig setting"));
> +		return 1;
> +	}
> +
> +	argv_array_pushl(&argv, "config", "--worktree", "core.sparseCheckout", NULL);
> +
> +	if (mode)
> +		argv_array_pushl(&argv, "true", NULL);
> +	else
> +		argv_array_pushl(&argv, "false", NULL);
> +
> +	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
> +		error(_("failed to enable core.sparseCheckout"));
> +		return 1;
> +	}

Why the external 'git config' invocation?

  git_config_set_in_file_gently(git_path("config.worktree"),
                                "core.sparseCheckout",
                                mode ? "true" : "false")

> +
> +	return 0;
> +}
> +
> +static int sparse_checkout_init(int argc, const char **argv)
> +{
> +	struct pattern_list pl;
> +	char *sparse_filename;
> +	FILE *fp;
> +	int res;
> +
> +	if (sc_set_config(MODE_ALL_PATTERNS))
> +		return 1;
> +
> +	memset(&pl, 0, sizeof(pl));
> +
> +	sparse_filename = get_sparse_checkout_filename();
> +	res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL);
> +
> +	/* If we already have a sparse-checkout file, use it. */
> +	if (res >= 0) {
> +		free(sparse_filename);
> +		goto reset_dir;
> +	}
> +
> +	/* initial mode: all blobs at root */
> +	fp = xfopen(sparse_filename, "w");
> +	free(sparse_filename);
> +	fprintf(fp, "/*\n!/*/\n");

What if this fprintf() call were to fail?

> +	fclose(fp);
> +
> +reset_dir:
> +	return update_working_directory();
> +}
> +
>  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>  {
>  	static struct option builtin_sparse_checkout_options[] = {
> @@ -79,6 +154,8 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
>  	if (argc > 0) {
>  		if (!strcmp(argv[0], "list"))
>  			return sparse_checkout_list(argc, argv);
> +		if (!strcmp(argv[0], "init"))
> +			return sparse_checkout_init(argc, argv);
>  	}
>  
>  	usage_with_options(builtin_sparse_checkout_usage,
> diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
> index 9b73d44907..cd56cc384b 100755
> --- a/t/t1091-sparse-checkout-builtin.sh
> +++ b/t/t1091-sparse-checkout-builtin.sh
> @@ -42,4 +42,45 @@ test_expect_success 'git sparse-checkout list (populated)' '
>  	test_cmp expect list
>  '
>  
> +test_expect_success 'git sparse-checkout init' '
> +	git -C repo sparse-checkout init &&
> +	cat >expect <<-EOF &&
> +		/*
> +		!/*/
> +	EOF
> +	test_cmp expect repo/.git/info/sparse-checkout &&
> +	git -C repo config --list >config &&
> +	test_i18ngrep "core.sparsecheckout=true" config &&

We have the 'test_cmp_config' helper function to check the expected
value of configuration variables.

> +	ls repo >dir  &&
> +	echo a >expect &&
> +	test_cmp expect dir
> +'
> +
> +test_expect_success 'git sparse-checkout list after init' '
> +	git -C repo sparse-checkout list >actual &&
> +	cat >expect <<-EOF &&
> +		/*
> +		!/*/
> +	EOF
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'init with existing sparse-checkout' '
> +	echo "*folder*" >> repo/.git/info/sparse-checkout &&
> +	git -C repo sparse-checkout init &&
> +	cat >expect <<-EOF &&
> +		/*
> +		!/*/
> +		*folder*
> +	EOF
> +	test_cmp expect repo/.git/info/sparse-checkout &&
> +	ls repo >dir  &&
> +	cat >expect <<-EOF &&
> +		a
> +		folder1
> +		folder2
> +	EOF
> +	test_cmp expect dir
> +'
> +
>  test_done
> -- 
> gitgitgadget
>
SZEDER Gábor Nov. 20, 2019, 2:13 p.m. UTC | #2
On Tue, Nov 19, 2019 at 03:33:08PM +0100, SZEDER Gábor wrote:
> > +	argv_array_pushl(&argv, "config", "--worktree", "core.sparseCheckout", NULL);
> > +
> > +	if (mode)
> > +		argv_array_pushl(&argv, "true", NULL);
> > +	else
> > +		argv_array_pushl(&argv, "false", NULL);
> > +
> > +	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
> > +		error(_("failed to enable core.sparseCheckout"));
> > +		return 1;
> > +	}
> 
> Why the external 'git config' invocation?
> 
>   git_config_set_in_file_gently(git_path("config.worktree"),
>                                 "core.sparseCheckout",
>                                 mode ? "true" : "false")

Having slept on it, I think it would be even better to pass NULL
instead of "false".  "false", well, just sets the configuration
variable to false, but a NULL would properly unset it.
SZEDER Gábor Nov. 21, 2019, 11:49 a.m. UTC | #3
On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
> Getting started with a sparse-checkout file can be daunting. Help
> users start their sparse enlistment using 'git sparse-checkout init'.
> This will set 'core.sparseCheckout=true' in their config, write
> an initial set of patterns to the sparse-checkout file, and update
> their working directory.

Enabling sparse-checkout can remove modified files:

  $ mkdir dir
  $ touch foo dir/bar
  $ git add .
  $ git commit -m Initial
  [master (root-commit) ecc81bd] Initial
   2 files changed, 0 insertions(+), 0 deletions(-)
   create mode 100644 dir/bar
   create mode 100644 foo
  $ echo changes >dir/bar
  $ ~/src/git/git sparse-checkout init
  error: Entry 'dir/bar' not uptodate. Cannot update sparse checkout.
  error: failed to update index with new sparse-checkout paths
  $ cat dir/bar 
  changes

So far so good, my changes are still there.  Unfortunately, however:

  $ git add -u
  $ ~/src/git/git sparse-checkout init
  $ echo $?
  0
  $ ls
  foo

Uh-oh, my changes are gone.
SZEDER Gábor Nov. 21, 2019, 12:34 p.m. UTC | #4
On Thu, Nov 21, 2019 at 12:49:36PM +0100, SZEDER Gábor wrote:
> On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
> > Getting started with a sparse-checkout file can be daunting. Help
> > users start their sparse enlistment using 'git sparse-checkout init'.
> > This will set 'core.sparseCheckout=true' in their config, write
> > an initial set of patterns to the sparse-checkout file, and update
> > their working directory.
> 
> Enabling sparse-checkout can remove modified files:
> 
>   $ mkdir dir
>   $ touch foo dir/bar
>   $ git add .
>   $ git commit -m Initial
>   [master (root-commit) ecc81bd] Initial
>    2 files changed, 0 insertions(+), 0 deletions(-)
>    create mode 100644 dir/bar
>    create mode 100644 foo
>   $ echo changes >dir/bar
>   $ ~/src/git/git sparse-checkout init
>   error: Entry 'dir/bar' not uptodate. Cannot update sparse checkout.
>   error: failed to update index with new sparse-checkout paths

And after this it leaves the 'sparse-checkout' file behind, which may
or may not be desired:

  $ cat .git/info/sparse-checkout 
  /*
  !/*/
Derrick Stolee Nov. 21, 2019, 2:28 p.m. UTC | #5
On 11/21/2019 6:49 AM, SZEDER Gábor wrote:
> On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
>> Getting started with a sparse-checkout file can be daunting. Help
>> users start their sparse enlistment using 'git sparse-checkout init'.
>> This will set 'core.sparseCheckout=true' in their config, write
>> an initial set of patterns to the sparse-checkout file, and update
>> their working directory.
> 
> Enabling sparse-checkout can remove modified files:
> 
>   $ mkdir dir
>   $ touch foo dir/bar
>   $ git add .
>   $ git commit -m Initial
>   [master (root-commit) ecc81bd] Initial
>    2 files changed, 0 insertions(+), 0 deletions(-)
>    create mode 100644 dir/bar
>    create mode 100644 foo
>   $ echo changes >dir/bar
>   $ ~/src/git/git sparse-checkout init
>   error: Entry 'dir/bar' not uptodate. Cannot update sparse checkout.
>   error: failed to update index with new sparse-checkout paths
>   $ cat dir/bar 
>   changes
> 
> So far so good, my changes are still there.  Unfortunately, however:
> 
>   $ git add -u
>   $ ~/src/git/git sparse-checkout init
>   $ echo $?
>   0
>   $ ls
>   foo
> 
> Uh-oh, my changes are gone.

Here, your changes are not "lost", they are staged, correct? A "git status"
should report that your changes are ready for committing. This seems to be
the expected behavior.

As to your other message about leaving the sparse-checkout file even when
the 'init' command fails, I'll fix that by using the in-process mechanism.

-Stolee
SZEDER Gábor Nov. 21, 2019, 3:27 p.m. UTC | #6
On Thu, Nov 21, 2019 at 09:28:59AM -0500, Derrick Stolee wrote:
> On 11/21/2019 6:49 AM, SZEDER Gábor wrote:
> > On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
> >> Getting started with a sparse-checkout file can be daunting. Help
> >> users start their sparse enlistment using 'git sparse-checkout init'.
> >> This will set 'core.sparseCheckout=true' in their config, write
> >> an initial set of patterns to the sparse-checkout file, and update
> >> their working directory.
> > 
> > Enabling sparse-checkout can remove modified files:
> > 
> >   $ mkdir dir
> >   $ touch foo dir/bar
> >   $ git add .
> >   $ git commit -m Initial
> >   [master (root-commit) ecc81bd] Initial
> >    2 files changed, 0 insertions(+), 0 deletions(-)
> >    create mode 100644 dir/bar
> >    create mode 100644 foo
> >   $ echo changes >dir/bar
> >   $ ~/src/git/git sparse-checkout init
> >   error: Entry 'dir/bar' not uptodate. Cannot update sparse checkout.
> >   error: failed to update index with new sparse-checkout paths
> >   $ cat dir/bar 
> >   changes
> > 
> > So far so good, my changes are still there.  Unfortunately, however:
> > 
> >   $ git add -u
> >   $ ~/src/git/git sparse-checkout init
> >   $ echo $?
> >   0
> >   $ ls
> >   foo
> > 
> > Uh-oh, my changes are gone.
> 
> Here, your changes are not "lost", they are staged, correct? 

Well, yes, the changes were staged, so they must be in the object
database somewhere, the user just has to go through the dangling
objects reported by 'git fsck' to find and restore it...  So from the
perspective of an ordinary user they are lost.

> A "git status"
> should report that your changes are ready for committing. This seems to be
> the expected behavior.

No, unfortunately enabling sparse-checkout did throw the staged
changes away:

  ~/test (master +)$ ~/src/git/git sparse-checkout init
  ~/test (master)$ git status 
  On branch master
  nothing to commit, working tree clean

Note also the shell prompt going from "you have staged changes" to
"working tree is clean".

But wait, I thought that only changes to files that are excluded from
the sparse-checkout are thrown away, but as it turns out it throws
away changes to files that are included in the sparse-checkout:

  ~/test (master #)$ echo original >foo
  ~/test (master #%)$ git add .
  ~/test (master +)$ git commit -m Initial
  [master (root-commit) 04cb2a2] Initial
   1 file changed, 1 insertion(+)
   create mode 100644 foo
  ~/test (master)$ echo changes >foo 
  ~/test (master *)$ ~/src/git/git sparse-checkout init
  ~/test (master *)$ cat foo 
  changes

So far so good, but:

  ~/test (master *)$ ~/src/git/git sparse-checkout disable
  ~/test (master *)$ git add .
  ~/test (master +)$ ~/src/git/git sparse-checkout init
  ~/test (master)$ cat foo 
  original

This is not really sparse-checkout-specific, because this is what 'git
read-tree -um HEAD' always does on its own:

  ~/test (master)$ echo changes >foo 
  ~/test (master *)$ git read-tree -um HEAD
  ~/test (master *)$ cat foo 
  changes
  ~/test (master *)$ git add -u
  ~/test (master +)$ git read-tree -um HEAD
  ~/test (master)$ cat foo 
  original

These issues are present at the end of the patch series as well, but
that is sort-of expected, because the later commit message states
that:

    Remove this extra process call by creating a direct call to
    unpack_trees() in the same way 'git read-tree -mu HEAD' does.
Derrick Stolee Nov. 21, 2019, 3:36 p.m. UTC | #7
On 11/21/2019 10:27 AM, SZEDER Gábor wrote:
> On Thu, Nov 21, 2019 at 09:28:59AM -0500, Derrick Stolee wrote:
>> On 11/21/2019 6:49 AM, SZEDER Gábor wrote:
>>> On Mon, Oct 21, 2019 at 01:56:11PM +0000, Derrick Stolee via GitGitGadget wrote:
>>>> Getting started with a sparse-checkout file can be daunting. Help
>>>> users start their sparse enlistment using 'git sparse-checkout init'.
>>>> This will set 'core.sparseCheckout=true' in their config, write
>>>> an initial set of patterns to the sparse-checkout file, and update
>>>> their working directory.
>>>
>>> Enabling sparse-checkout can remove modified files:
>>>
>>>   $ mkdir dir
>>>   $ touch foo dir/bar
>>>   $ git add .
>>>   $ git commit -m Initial
>>>   [master (root-commit) ecc81bd] Initial
>>>    2 files changed, 0 insertions(+), 0 deletions(-)
>>>    create mode 100644 dir/bar
>>>    create mode 100644 foo
>>>   $ echo changes >dir/bar
>>>   $ ~/src/git/git sparse-checkout init
>>>   error: Entry 'dir/bar' not uptodate. Cannot update sparse checkout.
>>>   error: failed to update index with new sparse-checkout paths
>>>   $ cat dir/bar 
>>>   changes
>>>
>>> So far so good, my changes are still there.  Unfortunately, however:
>>>
>>>   $ git add -u
>>>   $ ~/src/git/git sparse-checkout init
>>>   $ echo $?
>>>   0
>>>   $ ls
>>>   foo
>>>
>>> Uh-oh, my changes are gone.
>>
>> Here, your changes are not "lost", they are staged, correct? 
> 
> Well, yes, the changes were staged, so they must be in the object
> database somewhere, the user just has to go through the dangling
> objects reported by 'git fsck' to find and restore it...  So from the
> perspective of an ordinary user they are lost.
> 
>> A "git status"
>> should report that your changes are ready for committing. This seems to be
>> the expected behavior.
> 
> No, unfortunately enabling sparse-checkout did throw the staged
> changes away:
> 
>   ~/test (master +)$ ~/src/git/git sparse-checkout init
>   ~/test (master)$ git status 
>   On branch master
>   nothing to commit, working tree clean
> 
> Note also the shell prompt going from "you have staged changes" to
> "working tree is clean".
> 
> But wait, I thought that only changes to files that are excluded from
> the sparse-checkout are thrown away, but as it turns out it throws
> away changes to files that are included in the sparse-checkout:
> 
>   ~/test (master #)$ echo original >foo
>   ~/test (master #%)$ git add .
>   ~/test (master +)$ git commit -m Initial
>   [master (root-commit) 04cb2a2] Initial
>    1 file changed, 1 insertion(+)
>    create mode 100644 foo
>   ~/test (master)$ echo changes >foo 
>   ~/test (master *)$ ~/src/git/git sparse-checkout init
>   ~/test (master *)$ cat foo 
>   changes
> 
> So far so good, but:
> 
>   ~/test (master *)$ ~/src/git/git sparse-checkout disable
>   ~/test (master *)$ git add .
>   ~/test (master +)$ ~/src/git/git sparse-checkout init
>   ~/test (master)$ cat foo 
>   original
> 
> This is not really sparse-checkout-specific, because this is what 'git
> read-tree -um HEAD' always does on its own:
> 
>   ~/test (master)$ echo changes >foo 
>   ~/test (master *)$ git read-tree -um HEAD
>   ~/test (master *)$ cat foo 
>   changes
>   ~/test (master *)$ git add -u
>   ~/test (master +)$ git read-tree -um HEAD
>   ~/test (master)$ cat foo 
>   original
> 
> These issues are present at the end of the patch series as well, but
> that is sort-of expected, because the later commit message states
> that:
> 
>     Remove this extra process call by creating a direct call to
>     unpack_trees() in the same way 'git read-tree -mu HEAD' does.

Thanks for the additional details.

This series intended to make the existing sparse-checkout behavior
more useful to users by not requiring manual edits of the sparse-checkout
file followed by 'git read-tree' commands. However, there do appear
to be some serious improvements that we can make in the future.

Keeping staged changes seems important, and we can address that in
the near future.

-Stolee
SZEDER Gábor Nov. 21, 2019, 4:37 p.m. UTC | #8
On Thu, Nov 21, 2019 at 10:36:10AM -0500, Derrick Stolee wrote:
> > But wait, I thought that only changes to files that are excluded from
> > the sparse-checkout are thrown away, but as it turns out it throws
> > away changes to files that are included in the sparse-checkout:

For completeness, 'git sparse-checkout disable' throws away staged
changes as well, as it, too, runs 'git read-tree -um HEAD' (or its
equivalent).

> Thanks for the additional details.
> 
> This series intended to make the existing sparse-checkout behavior
> more useful to users by not requiring manual edits of the sparse-checkout
> file followed by 'git read-tree' commands. However, there do appear
> to be some serious improvements that we can make in the future.
> 
> Keeping staged changes seems important, and we can address that in
> the near future.

I think that at least for now 'git sparse-checkout' should flat out
refuse to init/set/disable if the working tree is not clean (but still
allow 'list', as that's a read-only operation), like the patch below.
Yeah, that way it wouldn't work in cases that appear to be safe
(unstaged changes), but it would prevent the data loss until we
carefully consider the circumstances under which these operations can
be safely allowed.


  -- >8 --

diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 76f65d8edd..4b05625c4c 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -12,6 +12,7 @@
 #include "lockfile.h"
 #include "resolve-undo.h"
 #include "unpack-trees.h"
+#include "wt-status.h"
 
 static char const * const builtin_sparse_checkout_usage[] = {
 	N_("git sparse-checkout (init|list|set|disable) <options>"),
@@ -188,6 +189,10 @@ static int sparse_checkout_init(int argc, const char **argv)
 			     builtin_sparse_checkout_init_options,
 			     builtin_sparse_checkout_init_usage, 0);
 
+	repo_read_index(the_repository);
+	require_clean_work_tree(the_repository,
+				N_("initialize sparse-checkout"), NULL, 1, 0);
+
 	if (init_opts.cone_mode) {
 		mode = MODE_CONE_PATTERNS;
 		core_sparse_checkout_cone = 1;
@@ -386,6 +391,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix)
 			     builtin_sparse_checkout_set_usage,
 			     PARSE_OPT_KEEP_UNKNOWN);
 
+	repo_read_index(the_repository);
+	require_clean_work_tree(the_repository,
+				N_("set up sparse-checkout"), NULL, 1, 0);
+
 	if (core_sparse_checkout_cone) {
 		struct strbuf line = STRBUF_INIT;
 		hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0);
@@ -437,6 +446,10 @@ static int sparse_checkout_disable(int argc, const char **argv)
 	char *sparse_filename;
 	FILE *fp;
 
+	repo_read_index(the_repository);
+	require_clean_work_tree(the_repository,
+				N_("disable sparse-checkout"), NULL, 1, 0);
+
 	if (sc_set_config(MODE_ALL_PATTERNS))
 		die(_("failed to change config"));
Elijah Newren Nov. 21, 2019, 5:01 p.m. UTC | #9
On Thu, Nov 21, 2019 at 8:37 AM SZEDER Gábor <szeder.dev@gmail.com> wrote:
>
> On Thu, Nov 21, 2019 at 10:36:10AM -0500, Derrick Stolee wrote:
> > > But wait, I thought that only changes to files that are excluded from
> > > the sparse-checkout are thrown away, but as it turns out it throws
> > > away changes to files that are included in the sparse-checkout:
>
> For completeness, 'git sparse-checkout disable' throws away staged
> changes as well, as it, too, runs 'git read-tree -um HEAD' (or its
> equivalent).
>
> > Thanks for the additional details.
> >
> > This series intended to make the existing sparse-checkout behavior
> > more useful to users by not requiring manual edits of the sparse-checkout
> > file followed by 'git read-tree' commands. However, there do appear
> > to be some serious improvements that we can make in the future.
> >
> > Keeping staged changes seems important, and we can address that in
> > the near future.
>
> I think that at least for now 'git sparse-checkout' should flat out
> refuse to init/set/disable if the working tree is not clean (but still
> allow 'list', as that's a read-only operation), like the patch below.
> Yeah, that way it wouldn't work in cases that appear to be safe
> (unstaged changes), but it would prevent the data loss until we
> carefully consider the circumstances under which these operations can
> be safely allowed.

A big +1 for this from me.

We had an unfortunately large number of mis-merging and dataloss bugs
in the merge machinery that were slowly fixed over the course of more
than a decade[1], due to the fact that builtin/merge required
index==HEAD and did so by placing a comment in the code notifying
folks that the individual merge strategies were responsible to enforce
it -- and, in practice, they *all* forgot to do so unless and until we
discovered bugs.  So, count me as a strongly in favor of just
preventatively enforcing safe conditions and then later relaxing them
in special conditions if it can be proven safe.

[1] https://public-inbox.org/git/20190725174611.14802-4-newren@gmail.com/

>   -- >8 --
>
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index 76f65d8edd..4b05625c4c 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -12,6 +12,7 @@
>  #include "lockfile.h"
>  #include "resolve-undo.h"
>  #include "unpack-trees.h"
> +#include "wt-status.h"
>
>  static char const * const builtin_sparse_checkout_usage[] = {
>         N_("git sparse-checkout (init|list|set|disable) <options>"),
> @@ -188,6 +189,10 @@ static int sparse_checkout_init(int argc, const char **argv)
>                              builtin_sparse_checkout_init_options,
>                              builtin_sparse_checkout_init_usage, 0);
>
> +       repo_read_index(the_repository);
> +       require_clean_work_tree(the_repository,
> +                               N_("initialize sparse-checkout"), NULL, 1, 0);
> +
>         if (init_opts.cone_mode) {
>                 mode = MODE_CONE_PATTERNS;
>                 core_sparse_checkout_cone = 1;
> @@ -386,6 +391,10 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix)
>                              builtin_sparse_checkout_set_usage,
>                              PARSE_OPT_KEEP_UNKNOWN);
>
> +       repo_read_index(the_repository);
> +       require_clean_work_tree(the_repository,
> +                               N_("set up sparse-checkout"), NULL, 1, 0);
> +
>         if (core_sparse_checkout_cone) {
>                 struct strbuf line = STRBUF_INIT;
>                 hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0);
> @@ -437,6 +446,10 @@ static int sparse_checkout_disable(int argc, const char **argv)
>         char *sparse_filename;
>         FILE *fp;
>
> +       repo_read_index(the_repository);
> +       require_clean_work_tree(the_repository,
> +                               N_("disable sparse-checkout"), NULL, 1, 0);
> +
>         if (sc_set_config(MODE_ALL_PATTERNS))
>                 die(_("failed to change config"));
Derrick Stolee Nov. 21, 2019, 5:15 p.m. UTC | #10
On 11/21/2019 12:01 PM, Elijah Newren wrote:
> On Thu, Nov 21, 2019 at 8:37 AM SZEDER Gábor <szeder.dev@gmail.com> wrote:
>>
>> On Thu, Nov 21, 2019 at 10:36:10AM -0500, Derrick Stolee wrote:
>>>> But wait, I thought that only changes to files that are excluded from
>>>> the sparse-checkout are thrown away, but as it turns out it throws
>>>> away changes to files that are included in the sparse-checkout:
>>
>> For completeness, 'git sparse-checkout disable' throws away staged
>> changes as well, as it, too, runs 'git read-tree -um HEAD' (or its
>> equivalent).
>>
>>> Thanks for the additional details.
>>>
>>> This series intended to make the existing sparse-checkout behavior
>>> more useful to users by not requiring manual edits of the sparse-checkout
>>> file followed by 'git read-tree' commands. However, there do appear
>>> to be some serious improvements that we can make in the future.
>>>
>>> Keeping staged changes seems important, and we can address that in
>>> the near future.
>>
>> I think that at least for now 'git sparse-checkout' should flat out
>> refuse to init/set/disable if the working tree is not clean (but still
>> allow 'list', as that's a read-only operation), like the patch below.
>> Yeah, that way it wouldn't work in cases that appear to be safe
>> (unstaged changes), but it would prevent the data loss until we
>> carefully consider the circumstances under which these operations can
>> be safely allowed.
> 
> A big +1 for this from me.
> 
> We had an unfortunately large number of mis-merging and dataloss bugs
> in the merge machinery that were slowly fixed over the course of more
> than a decade[1], due to the fact that builtin/merge required
> index==HEAD and did so by placing a comment in the code notifying
> folks that the individual merge strategies were responsible to enforce
> it -- and, in practice, they *all* forgot to do so unless and until we
> discovered bugs.  So, count me as a strongly in favor of just
> preventatively enforcing safe conditions and then later relaxing them
> in special conditions if it can be proven safe.
> 
> [1] https://public-inbox.org/git/20190725174611.14802-4-newren@gmail.com/

Sounds good. Thanks, both.

-Stolee
diff mbox series

Patch

diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
index 9d6ca22917..930a361567 100644
--- a/Documentation/git-sparse-checkout.txt
+++ b/Documentation/git-sparse-checkout.txt
@@ -30,6 +30,17 @@  COMMANDS
 'list'::
 	Provide a list of the contents in the sparse-checkout file.
 
+'init'::
+	Enable the `core.sparseCheckout` setting. If the
+	sparse-checkout file does not exist, then populate it with
+	patterns that match every file in the root directory and
+	no other directories, then will remove all directories tracked
+	by Git. Add patterns to the sparse-checkout file to
+	repopulate the working directory.
++
+To avoid interfering with other worktrees, it first enables the
+`extensions.worktreeConfig` setting and makes sure to set the
+`core.sparseCheckout` setting in the worktree-specific config file.
 
 SPARSE CHECKOUT
 ---------------
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 5717c9b2cb..77aa52ca01 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -8,7 +8,7 @@ 
 #include "strbuf.h"
 
 static char const * const builtin_sparse_checkout_usage[] = {
-	N_("git sparse-checkout list"),
+	N_("git sparse-checkout (init|list)"),
 	NULL
 };
 
@@ -59,6 +59,81 @@  static int sparse_checkout_list(int argc, const char **argv)
 	return 0;
 }
 
+static int update_working_directory(void)
+{
+	struct argv_array argv = ARGV_ARRAY_INIT;
+	int result = 0;
+	argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL);
+
+	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
+		error(_("failed to update index with new sparse-checkout paths"));
+		result = 1;
+	}
+
+	argv_array_clear(&argv);
+	return result;
+}
+
+enum sparse_checkout_mode {
+	MODE_NO_PATTERNS = 0,
+	MODE_ALL_PATTERNS = 1,
+};
+
+static int sc_set_config(enum sparse_checkout_mode mode)
+{
+	struct argv_array argv = ARGV_ARRAY_INIT;
+
+	if (git_config_set_gently("extensions.worktreeConfig", "true")) {
+		error(_("failed to set extensions.worktreeConfig setting"));
+		return 1;
+	}
+
+	argv_array_pushl(&argv, "config", "--worktree", "core.sparseCheckout", NULL);
+
+	if (mode)
+		argv_array_pushl(&argv, "true", NULL);
+	else
+		argv_array_pushl(&argv, "false", NULL);
+
+	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
+		error(_("failed to enable core.sparseCheckout"));
+		return 1;
+	}
+
+	return 0;
+}
+
+static int sparse_checkout_init(int argc, const char **argv)
+{
+	struct pattern_list pl;
+	char *sparse_filename;
+	FILE *fp;
+	int res;
+
+	if (sc_set_config(MODE_ALL_PATTERNS))
+		return 1;
+
+	memset(&pl, 0, sizeof(pl));
+
+	sparse_filename = get_sparse_checkout_filename();
+	res = add_patterns_from_file_to_list(sparse_filename, "", 0, &pl, NULL);
+
+	/* If we already have a sparse-checkout file, use it. */
+	if (res >= 0) {
+		free(sparse_filename);
+		goto reset_dir;
+	}
+
+	/* initial mode: all blobs at root */
+	fp = xfopen(sparse_filename, "w");
+	free(sparse_filename);
+	fprintf(fp, "/*\n!/*/\n");
+	fclose(fp);
+
+reset_dir:
+	return update_working_directory();
+}
+
 int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 {
 	static struct option builtin_sparse_checkout_options[] = {
@@ -79,6 +154,8 @@  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 	if (argc > 0) {
 		if (!strcmp(argv[0], "list"))
 			return sparse_checkout_list(argc, argv);
+		if (!strcmp(argv[0], "init"))
+			return sparse_checkout_init(argc, argv);
 	}
 
 	usage_with_options(builtin_sparse_checkout_usage,
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 9b73d44907..cd56cc384b 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -42,4 +42,45 @@  test_expect_success 'git sparse-checkout list (populated)' '
 	test_cmp expect list
 '
 
+test_expect_success 'git sparse-checkout init' '
+	git -C repo sparse-checkout init &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/
+	EOF
+	test_cmp expect repo/.git/info/sparse-checkout &&
+	git -C repo config --list >config &&
+	test_i18ngrep "core.sparsecheckout=true" config &&
+	ls repo >dir  &&
+	echo a >expect &&
+	test_cmp expect dir
+'
+
+test_expect_success 'git sparse-checkout list after init' '
+	git -C repo sparse-checkout list >actual &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'init with existing sparse-checkout' '
+	echo "*folder*" >> repo/.git/info/sparse-checkout &&
+	git -C repo sparse-checkout init &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/
+		*folder*
+	EOF
+	test_cmp expect repo/.git/info/sparse-checkout &&
+	ls repo >dir  &&
+	cat >expect <<-EOF &&
+		a
+		folder1
+		folder2
+	EOF
+	test_cmp expect dir
+'
+
 test_done