diff mbox series

[2/9] sparse-checkout: create 'init' subcommand

Message ID e6e982e5a6e4517d97a7a404384057110f3a151d.1566313865.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series New sparse-checkout builtin and "cone" mode | expand

Commit Message

Linus Arver via GitGitGadget Aug. 20, 2019, 3:11 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

Getting started with a sparse-checkout file can be daunting. Help
users start their sparse enlistment using 'git sparse-checkout init'.
This will set 'core.sparseCheckout=true' in their config, write
an initial set of patterns to the sparse-checkout file, and update
their working directory.

Using 'git read-tree' to clear directories does not work cleanly
on Windows, so manually delete directories that are tracked by Git
before running read-tree.

The use of running another process for 'git read-tree' is likely
suboptimal, but that can be improved in a later change, if valuable.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Documentation/git-sparse-checkout.txt |   7 ++
 builtin/sparse-checkout.c             | 106 +++++++++++++++++++++++++-
 t/t1091-sparse-checkout-builtin.sh    |  40 ++++++++++
 3 files changed, 152 insertions(+), 1 deletion(-)

Comments

Elijah Newren Aug. 23, 2019, 11:02 p.m. UTC | #1
On Tue, Aug 20, 2019 at 8:13 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> Getting started with a sparse-checkout file can be daunting. Help
> users start their sparse enlistment using 'git sparse-checkout init'.
> This will set 'core.sparseCheckout=true' in their config, write
> an initial set of patterns to the sparse-checkout file, and update
> their working directory.
>
> Using 'git read-tree' to clear directories does not work cleanly
> on Windows, so manually delete directories that are tracked by Git
> before running read-tree.

Is that a bug in read-tree that needs to be fixed?

> The use of running another process for 'git read-tree' is likely
> suboptimal, but that can be improved in a later change, if valuable.

I think it's valuable.  The bigger problem may be that "git read-tree
-mu HEAD" may turn out to be insufficient for our needs; see below....

>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  Documentation/git-sparse-checkout.txt |   7 ++
>  builtin/sparse-checkout.c             | 106 +++++++++++++++++++++++++-
>  t/t1091-sparse-checkout-builtin.sh    |  40 ++++++++++
>  3 files changed, 152 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
> index ca0ca6a12f..50c53ee60a 100644
> --- a/Documentation/git-sparse-checkout.txt
> +++ b/Documentation/git-sparse-checkout.txt
> @@ -26,6 +26,13 @@ COMMANDS
>  'list'::
>         Provide a list of the contents in the sparse-checkout file.
>
> +'init'::
> +       Enable the `core.sparseCheckout` setting. If the
> +       sparse-checkout file does not exist, then populate it with
> +       patterns that match every file in the root directory and
> +       no other directories, then will remove all directories tracked
> +       by Git. Add patterns to the sparse-checkout file to
> +       repopulate the working directory.
>
>  SPARSE CHECKOUT
>  ----------------
> diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
> index 6477a6ed9c..86d24e6295 100644
> --- a/builtin/sparse-checkout.c
> +++ b/builtin/sparse-checkout.c
> @@ -8,7 +8,7 @@
>  #include "strbuf.h"
>
>  static char const * const builtin_sparse_checkout_usage[] = {
> -       N_("git sparse-checkout [list]"),
> +       N_("git sparse-checkout [init|list]"),
>         NULL
>  };
>
> @@ -64,6 +64,108 @@ static int sparse_checkout_list(int argc, const char **argv)
>         return 0;
>  }
>
> +static int sc_read_tree(void)
> +{
> +       struct argv_array argv = ARGV_ARRAY_INIT;
> +       int result = 0;
> +       argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL);
> +
> +       if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
> +               error(_("failed to update index with new sparse-checkout paths"));
> +               result = 1;
> +       }

`git read-tree -m -u HEAD` will fail if the index has any higher stage
entries in it, even if those higher stage entries correspond to files
which are included in the sparseness patterns and thus would not need
an update.  It might be nice if we can find a way to provide a better
error message, and/or implement the read-tree -m -u HEAD internally in
a way that will allow us to not fail if the conflicted files are
included in the sparse set.

> +
> +       argv_array_clear(&argv);
> +       return result;
> +}
> +
> +static int sc_enable_config(void)
> +{
> +       struct argv_array argv = ARGV_ARRAY_INIT;
> +       int result = 0;
> +       argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "true", NULL);

Why --add?  That seems really odd to me.

This should also have "--worktree".  And this function should either
set extensions.worktreeConfig to true or die if it isn't already set;
not sure which.  There's some UI and documentation stuff to figure out
here...

> +
> +       if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
> +               error(_("failed to enable core.sparseCheckout"));
> +               result = 1;
> +       }
> +
> +       argv_array_clear(&argv);
> +       return result;
> +}
> +
> +static int delete_directory(const struct object_id *oid, struct strbuf *base,
> +               const char *pathname, unsigned mode, int stage, void *context)
> +{
> +       struct strbuf dirname = STRBUF_INIT;
> +       struct stat sb;
> +
> +       strbuf_addstr(&dirname, the_repository->worktree);
> +       strbuf_addch(&dirname, '/');
> +       strbuf_addstr(&dirname, pathname);
> +
> +       if (stat(dirname.buf, &sb) || !(sb.st_mode & S_IFDIR))
> +               return 0;
> +
> +       if (remove_dir_recursively(&dirname, 0))

flags = 0 implies not REMOVE_DIR_EMPTY_ONLY.  I'm not familiar with
remove_dir_recursively(), but won't this delete everything...including
untracked files?  If so, that sounds like a bug.

> +               warning(_("failed to remove directory '%s'"),
> +                       dirname.buf);
> +
> +       strbuf_release(&dirname);
> +       return 0;
> +}
> +
> +static int sparse_checkout_init(int argc, const char **argv)
> +{
> +       struct tree *t;
> +       struct object_id oid;
> +       struct exclude_list el;
> +       static struct pathspec pathspec;
> +       char *sparse_filename;
> +       FILE *fp;
> +       int res;
> +
> +       if (sc_enable_config())
> +               return 1;
> +
> +       memset(&el, 0, sizeof(el));
> +
> +       sparse_filename = get_sparse_checkout_filename();
> +       res = add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);

But 'el' isn't used again?  Why are we getting the list of files from
sparse_filename then?

> +
> +       /* If we already have a sparse-checkout file, use it. */
> +       if (res >= 0) {
> +               free(sparse_filename);
> +               goto reset_dir;
> +       }
> +
> +       /* initial mode: all blobs at root */
> +       fp = fopen(sparse_filename, "w");
> +       free(sparse_filename);
> +       fprintf(fp, "/*\n!/*/*\n");
> +       fclose(fp);

Makes sense.

> +
> +       /* remove all directories in the root, if tracked by Git */
> +       if (get_oid("HEAD", &oid)) {
> +               /* assume we are in a fresh repo */
> +               return 0;
> +       }
> +
> +       t = parse_tree_indirect(&oid);
> +
> +       parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
> +                                 ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
> +                      PATHSPEC_PREFER_CWD,
> +                      "", NULL);
> +
> +       if (read_tree_recursive(the_repository, t, "", 0, 0, &pathspec,
> +                               delete_directory, NULL))
> +               return 1;

Since this is only needed on Windows, as per your commit message,
should it be #ifdef'd?  Or is this actually a bug that should be fixed
in "git read-tree -mu HEAD"?

> +
> +reset_dir:
> +       return sc_read_tree();
> +}
> +

The rest looks fine.
Derrick Stolee Sept. 11, 2019, 2:27 p.m. UTC | #2
On 8/23/2019 7:02 PM, Elijah Newren wrote:
> On Tue, Aug 20, 2019 at 8:13 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> Getting started with a sparse-checkout file can be daunting. Help
>> users start their sparse enlistment using 'git sparse-checkout init'.
>> This will set 'core.sparseCheckout=true' in their config, write
>> an initial set of patterns to the sparse-checkout file, and update
>> their working directory.
>>
>> Using 'git read-tree' to clear directories does not work cleanly
>> on Windows, so manually delete directories that are tracked by Git
>> before running read-tree.
> 
> Is that a bug in read-tree that needs to be fixed?

Just to follow up on this: it turns out that this is NOT a bug in
read-tree, but rather a side-effect of our custom "core.gvfs" config
setting. In the virtualized world, we didn't want Git to hard-delete
a folder just because we marked everything sparse.

By removing that option from my environment, the deletions work as
expected.

Thanks,
-Stolee
Derrick Stolee Sept. 11, 2019, 8:28 p.m. UTC | #3
On 8/23/2019 7:02 PM, Elijah Newren wrote:
> On Tue, Aug 20, 2019 at 8:13 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>> +static int sc_read_tree(void)
>> +{
>> +       struct argv_array argv = ARGV_ARRAY_INIT;
>> +       int result = 0;
>> +       argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL);
>> +
>> +       if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
>> +               error(_("failed to update index with new sparse-checkout paths"));
>> +               result = 1;
>> +       }
> 
> `git read-tree -m -u HEAD` will fail if the index has any higher stage
> entries in it, even if those higher stage entries correspond to files
> which are included in the sparseness patterns and thus would not need
> an update.  It might be nice if we can find a way to provide a better
> error message, and/or implement the read-tree -m -u HEAD internally in
> a way that will allow us to not fail if the conflicted files are
> included in the sparse set.

I agree that this is not the _best_ thing to do, but it does mimic the
current recommendation for a user interacting with sparse-checkout.

I'll rename this helper to something like "update_working_directory()"
so it can be swapped with a different implementation later, after we
work out those usability kinks.

The other thing that is needed here: allow reverting the sparse-checkout
settings if this fails. I'll isolate that to a new commit so we can
examine that behavior carefully.

> 
>> +
>> +       argv_array_clear(&argv);
>> +       return result;
>> +}
>> +
>> +static int sc_enable_config(void)
>> +{
>> +       struct argv_array argv = ARGV_ARRAY_INIT;
>> +       int result = 0;
>> +       argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "true", NULL);
> > Why --add?  That seems really odd to me.

Yeah, that's a mistake. Good find.

> 
> This should also have "--worktree".  And this function should either
> set extensions.worktreeConfig to true or die if it isn't already set;
> not sure which.  There's some UI and documentation stuff to figure out
> here...

I was planning to switch my `git config` subcommand to use in-process
methods, but I'm struggling to find a way to ensure we follow the
`--worktree` option. It likely would work if extensions.worktreeConfig
was enabled when the process starts, but adding it in-process likely
causes a problem.

> 
>> +
>> +       if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
>> +               error(_("failed to enable core.sparseCheckout"));
>> +               result = 1;
>> +       }
>> +
>> +       argv_array_clear(&argv);
>> +       return result;
>> +}
>> +
>> +static int delete_directory(const struct object_id *oid, struct strbuf *base,
>> +               const char *pathname, unsigned mode, int stage, void *context)
>> +{
>> +       struct strbuf dirname = STRBUF_INIT;
>> +       struct stat sb;
>> +
>> +       strbuf_addstr(&dirname, the_repository->worktree);
>> +       strbuf_addch(&dirname, '/');
>> +       strbuf_addstr(&dirname, pathname);
>> +
>> +       if (stat(dirname.buf, &sb) || !(sb.st_mode & S_IFDIR))
>> +               return 0;
>> +
>> +       if (remove_dir_recursively(&dirname, 0))
> 
> flags = 0 implies not REMOVE_DIR_EMPTY_ONLY.  I'm not familiar with
> remove_dir_recursively(), but won't this delete everything...including
> untracked files?  If so, that sounds like a bug.
This whole thing isn't needed any more, since read-tree does the right
thing.

> 
>> +               warning(_("failed to remove directory '%s'"),
>> +                       dirname.buf);
>> +
>> +       strbuf_release(&dirname);
>> +       return 0;
>> +}
>> +
>> +static int sparse_checkout_init(int argc, const char **argv)
>> +{
>> +       struct tree *t;
>> +       struct object_id oid;
>> +       struct exclude_list el;
>> +       static struct pathspec pathspec;
>> +       char *sparse_filename;
>> +       FILE *fp;
>> +       int res;
>> +
>> +       if (sc_enable_config())
>> +               return 1;
>> +
>> +       memset(&el, 0, sizeof(el));
>> +
>> +       sparse_filename = get_sparse_checkout_filename();
>> +       res = add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);
> 
> But 'el' isn't used again?  Why are we getting the list of files from
> sparse_filename then?

This is the only way I could think to check that the sparse-checkout file parses well without just doing the file open myself. Maybe we only need to check if the file exists (and is not empty).

>> +
>> +       /* If we already have a sparse-checkout file, use it. */
>> +       if (res >= 0) {
>> +               free(sparse_filename);
>> +               goto reset_dir;
>> +       }
>> +
>> +       /* initial mode: all blobs at root */
>> +       fp = fopen(sparse_filename, "w");
>> +       free(sparse_filename);
>> +       fprintf(fp, "/*\n!/*/*\n");
>> +       fclose(fp);
> 
> Makes sense.
> 
>> +
>> +       /* remove all directories in the root, if tracked by Git */
>> +       if (get_oid("HEAD", &oid)) {
>> +               /* assume we are in a fresh repo */
>> +               return 0;
>> +       }
>> +
>> +       t = parse_tree_indirect(&oid);
>> +
>> +       parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
>> +                                 ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
>> +                      PATHSPEC_PREFER_CWD,
>> +                      "", NULL);
>> +
>> +       if (read_tree_recursive(the_repository, t, "", 0, 0, &pathspec,
>> +                               delete_directory, NULL))
>> +               return 1;
> 
> Since this is only needed on Windows, as per your commit message,
> should it be #ifdef'd?  Or is this actually a bug that should be fixed
> in "git read-tree -mu HEAD"?

(this will not be needed, but thanks!)
 
>> +
>> +reset_dir:
>> +       return sc_read_tree();
>> +}
>> +
> 
> The rest looks fine.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
index ca0ca6a12f..50c53ee60a 100644
--- a/Documentation/git-sparse-checkout.txt
+++ b/Documentation/git-sparse-checkout.txt
@@ -26,6 +26,13 @@  COMMANDS
 'list'::
 	Provide a list of the contents in the sparse-checkout file.
 
+'init'::
+	Enable the `core.sparseCheckout` setting. If the
+	sparse-checkout file does not exist, then populate it with
+	patterns that match every file in the root directory and
+	no other directories, then will remove all directories tracked
+	by Git. Add patterns to the sparse-checkout file to
+	repopulate the working directory.
 
 SPARSE CHECKOUT
 ----------------
diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c
index 6477a6ed9c..86d24e6295 100644
--- a/builtin/sparse-checkout.c
+++ b/builtin/sparse-checkout.c
@@ -8,7 +8,7 @@ 
 #include "strbuf.h"
 
 static char const * const builtin_sparse_checkout_usage[] = {
-	N_("git sparse-checkout [list]"),
+	N_("git sparse-checkout [init|list]"),
 	NULL
 };
 
@@ -64,6 +64,108 @@  static int sparse_checkout_list(int argc, const char **argv)
 	return 0;
 }
 
+static int sc_read_tree(void)
+{
+	struct argv_array argv = ARGV_ARRAY_INIT;
+	int result = 0;
+	argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL);
+
+	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
+		error(_("failed to update index with new sparse-checkout paths"));
+		result = 1;
+	}
+
+	argv_array_clear(&argv);
+	return result;
+}
+
+static int sc_enable_config(void)
+{
+	struct argv_array argv = ARGV_ARRAY_INIT;
+	int result = 0;
+	argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", "true", NULL);
+
+	if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
+		error(_("failed to enable core.sparseCheckout"));
+		result = 1;
+	}
+
+	argv_array_clear(&argv);
+	return result;
+}
+
+static int delete_directory(const struct object_id *oid, struct strbuf *base,
+		const char *pathname, unsigned mode, int stage, void *context)
+{
+	struct strbuf dirname = STRBUF_INIT;
+	struct stat sb;
+
+	strbuf_addstr(&dirname, the_repository->worktree);
+	strbuf_addch(&dirname, '/');
+	strbuf_addstr(&dirname, pathname);
+
+	if (stat(dirname.buf, &sb) || !(sb.st_mode & S_IFDIR))
+		return 0;
+
+	if (remove_dir_recursively(&dirname, 0))
+		warning(_("failed to remove directory '%s'"),
+			dirname.buf);
+
+	strbuf_release(&dirname);
+	return 0;
+}
+
+static int sparse_checkout_init(int argc, const char **argv)
+{
+	struct tree *t;
+	struct object_id oid;
+	struct exclude_list el;
+	static struct pathspec pathspec;
+	char *sparse_filename;
+	FILE *fp;
+	int res;
+
+	if (sc_enable_config())
+		return 1;
+
+	memset(&el, 0, sizeof(el));
+
+	sparse_filename = get_sparse_checkout_filename();
+	res = add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL);
+
+	/* If we already have a sparse-checkout file, use it. */
+	if (res >= 0) {
+		free(sparse_filename);
+		goto reset_dir;
+	}
+
+	/* initial mode: all blobs at root */
+	fp = fopen(sparse_filename, "w");
+	free(sparse_filename);
+	fprintf(fp, "/*\n!/*/*\n");
+	fclose(fp);
+
+	/* remove all directories in the root, if tracked by Git */
+	if (get_oid("HEAD", &oid)) {
+		/* assume we are in a fresh repo */
+		return 0;
+	}
+
+	t = parse_tree_indirect(&oid);
+
+	parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC &
+				  ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL),
+		       PATHSPEC_PREFER_CWD,
+		       "", NULL);
+
+	if (read_tree_recursive(the_repository, t, "", 0, 0, &pathspec,
+				delete_directory, NULL))
+		return 1;
+
+reset_dir:
+	return sc_read_tree();
+}
+
 int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 {
 	static struct option builtin_sparse_checkout_options[] = {
@@ -83,6 +185,8 @@  int cmd_sparse_checkout(int argc, const char **argv, const char *prefix)
 	if (argc > 0) {
 		if (!strcmp(argv[0], "list"))
 			return sparse_checkout_list(argc, argv);
+		if (!strcmp(argv[0], "init"))
+			return sparse_checkout_init(argc, argv);
 	}
 
 	usage_with_options(builtin_sparse_checkout_usage,
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index ba6928c641..35ab84aabd 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -47,4 +47,44 @@  test_expect_success 'git sparse-checkout list (populated)' '
 	test_cmp expect list
 '
 
+test_expect_success 'git sparse-checkout init' '
+	git -C repo sparse-checkout init &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/*
+	EOF
+	test_cmp expect repo/.git/info/sparse-checkout &&
+	git -C repo config --list >config &&
+	test_i18ngrep "core.sparsecheckout=true" config &&
+	ls repo >dir  &&
+	echo a >expect &&
+	test_cmp expect dir
+'
+
+test_expect_success 'git sparse-checkout list after init' '
+	git -C repo sparse-checkout list >actual &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/*
+	EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'init with existing sparse-checkout' '
+	echo "/folder1/*" >> repo/.git/info/sparse-checkout &&
+	git -C repo sparse-checkout init &&
+	cat >expect <<-EOF &&
+		/*
+		!/*/*
+		/folder1/*
+	EOF
+	test_cmp expect repo/.git/info/sparse-checkout &&
+	ls repo >dir  &&
+	cat >expect <<-EOF &&
+		a
+		folder1
+	EOF
+	test_cmp expect dir
+'
+
 test_done
\ No newline at end of file