diff mbox series

[WIP,v2,2/5] mv: check if out-of-cone file exists in index with SKIP_WORKTREE bit

Message ID 20220527100804.209890-3-shaoxuan.yuan02@gmail.com (mailing list archive)
State Superseded
Headers show
Series mv: fix out-of-cone file/directory move logic | expand

Commit Message

Shaoxuan Yuan May 27, 2022, 10:08 a.m. UTC
Originally, moving a <source> file which is not on-disk but exists in
index as a SKIP_WORKTREE enabled cache entry, "giv mv" command errors
out with "bad source".

Change the checking logic, so that such <source>
file makes "giv mv" command warns with "advise_on_updating_sparse_paths()"
instead of "bad source"; also user now can supply a "--sparse" flag so
this operation can be carried out successfully.

Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
---
 builtin/mv.c                  | 26 +++++++++++++++++++++++++-
 t/t7002-mv-sparse-checkout.sh |  4 ++--
 2 files changed, 27 insertions(+), 3 deletions(-)

Comments

Derrick Stolee May 27, 2022, 3:13 p.m. UTC | #1
On 5/27/2022 6:08 AM, Shaoxuan Yuan wrote:
> Originally, moving a <source> file which is not on-disk but exists in
> index as a SKIP_WORKTREE enabled cache entry, "giv mv" command errors
> out with "bad source".
> 
> Change the checking logic, so that such <source>
> file makes "giv mv" command warns with "advise_on_updating_sparse_paths()"
> instead of "bad source"; also user now can supply a "--sparse" flag so
> this operation can be carried out successfully.
> 
> Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
> ---
>  builtin/mv.c                  | 26 +++++++++++++++++++++++++-
>  t/t7002-mv-sparse-checkout.sh |  4 ++--
>  2 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/builtin/mv.c b/builtin/mv.c
> index 83a465ba83..32ad4d5682 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -185,8 +185,32 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>  
>  		length = strlen(src);
>  		if (lstat(src, &st) < 0) {
> +			/*
> +			 * TODO: for now, when you try to overwrite a <destination>
> +			 * with your <source> as a sparse file, if you supply a "--sparse"
> +			 * flag, then the action will be done without providing "--force"
> +			 * and no warning.
> +			 *
> +			 * This is mainly because the sparse <source>
> +			 * is not on-disk, and this if-else chain will be cut off early in
> +			 * this check, thus the "--force" check is ignored. Need fix.
> +			 */

I wonder if this is worth the comment here, or if we'd rather see
the mention in the commit message. You have documented tests that
fail in this case, so we already have something that marks this
as "TODO" in a more discoverable place.

> +			int pos = cache_name_pos(src, length);
> +			if (pos >= 0) {
> +				const struct cache_entry *ce = active_cache[pos];
> +
> +				if (ce_skip_worktree(ce)) {
> +					if (!ignore_sparse)
> +						string_list_append(&only_match_skip_worktree, src);
> +					else
> +						modes[i] = SPARSE;


> +				}
> +				else
> +					bad = _("bad source");

style nit:

	} else {
		bad = _("bad source");
	}

> +			}
>  			/* only error if existence is expected. */
> -			if (modes[i] != SPARSE)
> +			else if (modes[i] != SPARSE)
>  				bad = _("bad source");

For this one, the comment makes it difficult to connect the 'else
if' to its corresponding 'if'. Perhaps:

	} else if (modes[i] != SPARSE) {
		/* only error if existence is expected. */
		bad = _("bad source");
	}

>  		} else if (!strncmp(src, dst, length) &&
>  				(dst[length] == 0 || dst[length] == '/')) {

In general, I found this if/else-if chain hard to grok, and
a lot of it is because we have "simple" cases at the end
and the complicated parts have ever-increasing nesting. This
is mostly due to the existing if/else-if chain in this method.

Here is a diff that replaces that if/else-if chain with a
'goto' trick to jump ahead, allowing some code to decrease in
tabbing:

---- >8 ----

diff --git a/builtin/mv.c b/builtin/mv.c
index 83a465ba831..1ca2c21da89 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -186,53 +186,68 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 		length = strlen(src);
 		if (lstat(src, &st) < 0) {
 			/* only error if existence is expected. */
-			if (modes[i] != SPARSE)
+			if (modes[i] != SPARSE) {
 				bad = _("bad source");
-		} else if (!strncmp(src, dst, length) &&
-				(dst[length] == 0 || dst[length] == '/')) {
+				goto act_on_entry;
+			}
+		}
+		if (!strncmp(src, dst, length) &&
+		    (dst[length] == 0 || dst[length] == '/')) {
 			bad = _("can not move directory into itself");
-		} else if ((src_is_dir = S_ISDIR(st.st_mode))
-				&& lstat(dst, &st) == 0)
+			goto act_on_entry;
+		}
+		if ((src_is_dir = S_ISDIR(st.st_mode))
+		    && lstat(dst, &st) == 0) {
 			bad = _("cannot move directory over file");
-		else if (src_is_dir) {
+			goto act_on_entry;
+		}
+		if (src_is_dir) {
+			int j, dst_len, n;
 			int first = cache_name_pos(src, length), last;
 
-			if (first >= 0)
+			if (first >= 0) {
 				prepare_move_submodule(src, first,
 						       submodule_gitfile + i);
-			else if (index_range_of_same_dir(src, length,
-							 &first, &last) < 1)
+				goto act_on_entry;
+			} else if (index_range_of_same_dir(src, length,
+							   &first, &last) < 1) {
 				bad = _("source directory is empty");
-			else { /* last - first >= 1 */
-				int j, dst_len, n;
-
-				modes[i] = WORKING_DIRECTORY;
-				n = argc + last - first;
-				REALLOC_ARRAY(source, n);
-				REALLOC_ARRAY(destination, n);
-				REALLOC_ARRAY(modes, n);
-				REALLOC_ARRAY(submodule_gitfile, n);
-
-				dst = add_slash(dst);
-				dst_len = strlen(dst);
-
-				for (j = 0; j < last - first; j++) {
-					const struct cache_entry *ce = active_cache[first + j];
-					const char *path = ce->name;
-					source[argc + j] = path;
-					destination[argc + j] =
-						prefix_path(dst, dst_len, path + length + 1);
-					modes[argc + j] = ce_skip_worktree(ce) ? SPARSE : INDEX;
-					submodule_gitfile[argc + j] = NULL;
-				}
-				argc += last - first;
+				goto act_on_entry;
 			}
-		} else if (!(ce = cache_file_exists(src, length, 0))) {
+
+			/* last - first >= 1 */
+			modes[i] = WORKING_DIRECTORY;
+			n = argc + last - first;
+			REALLOC_ARRAY(source, n);
+			REALLOC_ARRAY(destination, n);
+			REALLOC_ARRAY(modes, n);
+			REALLOC_ARRAY(submodule_gitfile, n);
+
+			dst = add_slash(dst);
+			dst_len = strlen(dst);
+
+			for (j = 0; j < last - first; j++) {
+				const struct cache_entry *ce = active_cache[first + j];
+				const char *path = ce->name;
+				source[argc + j] = path;
+				destination[argc + j] =
+					prefix_path(dst, dst_len, path + length + 1);
+				modes[argc + j] = ce_skip_worktree(ce) ? SPARSE : INDEX;
+				submodule_gitfile[argc + j] = NULL;
+			}
+			argc += last - first;
+			goto act_on_entry;
+		}
+		if (!(ce = cache_file_exists(src, length, 0))) {
 			bad = _("not under version control");
-		} else if (ce_stage(ce)) {
+			goto act_on_entry;
+		}
+		if (ce_stage(ce)) {
 			bad = _("conflicted");
-		} else if (lstat(dst, &st) == 0 &&
-			 (!ignore_case || strcasecmp(src, dst))) {
+			goto act_on_entry;
+		}
+		if (lstat(dst, &st) == 0 &&
+		    (!ignore_case || strcasecmp(src, dst))) {
 			bad = _("destination exists");
 			if (force) {
 				/*
@@ -246,34 +261,40 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 				} else
 					bad = _("Cannot overwrite");
 			}
-		} else if (string_list_has_string(&src_for_dst, dst))
+			goto act_on_entry;
+		}
+		if (string_list_has_string(&src_for_dst, dst)) {
 			bad = _("multiple sources for the same target");
-		else if (is_dir_sep(dst[strlen(dst) - 1]))
+			goto act_on_entry;
+		}
+		if (is_dir_sep(dst[strlen(dst) - 1])) {
 			bad = _("destination directory does not exist");
-		else {
-			/*
-			 * We check if the paths are in the sparse-checkout
-			 * definition as a very final check, since that
-			 * allows us to point the user to the --sparse
-			 * option as a way to have a successful run.
-			 */
-			if (!ignore_sparse &&
-			    !path_in_sparse_checkout(src, &the_index)) {
-				string_list_append(&only_match_skip_worktree, src);
-				skip_sparse = 1;
-			}
-			if (!ignore_sparse &&
-			    !path_in_sparse_checkout(dst, &the_index)) {
-				string_list_append(&only_match_skip_worktree, dst);
-				skip_sparse = 1;
-			}
-
-			if (skip_sparse)
-				goto remove_entry;
+			goto act_on_entry;
+		}
 
-			string_list_insert(&src_for_dst, dst);
+		/*
+		 * We check if the paths are in the sparse-checkout
+		 * definition as a very final check, since that
+		 * allows us to point the user to the --sparse
+		 * option as a way to have a successful run.
+		 */
+		if (!ignore_sparse &&
+		    !path_in_sparse_checkout(src, &the_index)) {
+			string_list_append(&only_match_skip_worktree, src);
+			skip_sparse = 1;
+		}
+		if (!ignore_sparse &&
+		    !path_in_sparse_checkout(dst, &the_index)) {
+			string_list_append(&only_match_skip_worktree, dst);
+			skip_sparse = 1;
 		}
 
+		if (skip_sparse)
+			goto remove_entry;
+
+		string_list_insert(&src_for_dst, dst);
+
+act_on_entry:
 		if (!bad)
 			continue;
 		if (!ignore_errors)

---- >8 ----

But mostly the reason for this refactor is that the following
diff should be equivalent to yours:

---- >8 ----

diff --git a/builtin/mv.c b/builtin/mv.c
index d8b5c24fb5..add48e23b4 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -185,11 +185,28 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 
 		length = strlen(src);
 		if (lstat(src, &st) < 0) {
-			/* only error if existence is expected. */
-			if (modes[i] != SPARSE) {
+			int pos;
+			const struct cache_entry *ce;
+
+			pos = cache_name_pos(src, length);
+			if (pos < 0) {
+				/* only error if existence is expected. */
+				if (modes[i] != SPARSE)
+					bad = _("bad source");
+				goto act_on_entry;
+			}
+
+			ce = active_cache[pos];
+			if (!ce_skip_worktree(ce)) {
 				bad = _("bad source");
 				goto act_on_entry;
 			}
+
+			if (!ignore_sparse)
+				string_list_append(&only_match_skip_worktree, src);
+			else
+				modes[i] = SPARSE;
+			goto act_on_entry;
 		}
 		if (!strncmp(src, dst, length) &&
 		    (dst[length] == 0 || dst[length] == '/')) {
---- >8 ---

To me, this is a bit easier to parse, since we find the error
cases and jump to the action before continuing on the "happy
path". It does involve that first big refactor first, so I'd
like to hear opinions of other contributors before you jump to
taking this suggestion.

Thanks,
-Stolee
Victoria Dye May 27, 2022, 10:38 p.m. UTC | #2
Derrick Stolee wrote:
> 
> 
> On 5/27/2022 6:08 AM, Shaoxuan Yuan wrote:
>> Originally, moving a <source> file which is not on-disk but exists in
>> index as a SKIP_WORKTREE enabled cache entry, "giv mv" command errors
>> out with "bad source".
>>
>> Change the checking logic, so that such <source>
>> file makes "giv mv" command warns with "advise_on_updating_sparse_paths()"
>> instead of "bad source"; also user now can supply a "--sparse" flag so
>> this operation can be carried out successfully.
>>
>> Signed-off-by: Shaoxuan Yuan <shaoxuan.yuan02@gmail.com>
>> ---
>>  builtin/mv.c                  | 26 +++++++++++++++++++++++++-
>>  t/t7002-mv-sparse-checkout.sh |  4 ++--
>>  2 files changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/builtin/mv.c b/builtin/mv.c
>> index 83a465ba83..32ad4d5682 100644
>> --- a/builtin/mv.c
>> +++ b/builtin/mv.c
>> @@ -185,8 +185,32 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>>  
>>  		length = strlen(src);
>>  		if (lstat(src, &st) < 0) {
>> +			/*
>> +			 * TODO: for now, when you try to overwrite a <destination>
>> +			 * with your <source> as a sparse file, if you supply a "--sparse"
>> +			 * flag, then the action will be done without providing "--force"
>> +			 * and no warning.
>> +			 *
>> +			 * This is mainly because the sparse <source>
>> +			 * is not on-disk, and this if-else chain will be cut off early in
>> +			 * this check, thus the "--force" check is ignored. Need fix.
>> +			 */
> 
> I wonder if this is worth the comment here, or if we'd rather see
> the mention in the commit message. You have documented tests that
> fail in this case, so we already have something that marks this
> as "TODO" in a more discoverable place.
> 
>> +			int pos = cache_name_pos(src, length);
>> +			if (pos >= 0) {
>> +				const struct cache_entry *ce = active_cache[pos];
>> +
>> +				if (ce_skip_worktree(ce)) {
>> +					if (!ignore_sparse)
>> +						string_list_append(&only_match_skip_worktree, src);
>> +					else
>> +						modes[i] = SPARSE;
> 
> 
>> +				}
>> +				else
>> +					bad = _("bad source");
> 
> style nit:
> 
> 	} else {
> 		bad = _("bad source");
> 	}
> 

In case this advice seems contradictory with past style suggestions, from 'Documentation/CodingGuidelines':

	- When there are multiple arms to a conditional and some of them
	  require braces, enclose even a single line block in braces for
	  consistency. E.g.:

		if (foo) {
			doit();
		} else {
			one();
			two();
			three();
		}

>> +			}
>>  			/* only error if existence is expected. */
>> -			if (modes[i] != SPARSE)
>> +			else if (modes[i] != SPARSE)
>>  				bad = _("bad source");
> 
> For this one, the comment makes it difficult to connect the 'else
> if' to its corresponding 'if'. Perhaps:
> 
> 	} else if (modes[i] != SPARSE) {
> 		/* only error if existence is expected. */
> 		bad = _("bad source");
> 	}
> 
>>  		} else if (!strncmp(src, dst, length) &&
>>  				(dst[length] == 0 || dst[length] == '/')) {
> 
> In general, I found this if/else-if chain hard to grok, and
> a lot of it is because we have "simple" cases at the end
> and the complicated parts have ever-increasing nesting. This
> is mostly due to the existing if/else-if chain in this method.
> 

Agreed that the if/else-if chains make 'cmd_mv' complicated. The most
frustrating thing about its current state (unrelated to this patch) is how
unclear it is whether any given conditions are mutually-exclusive vs.
dependent vs. one taking precedence over another. On that note... 

> Here is a diff that replaces that if/else-if chain with a
> 'goto' trick to jump ahead, allowing some code to decrease in
> tabbing:
> 

...while I'm usually hesitant to add more 'goto' labels to the code if it
can be avoided, I think that model fits this use case well.

> ---- >8 ----

[cutting the proposed refactor for space]

> ---- >8 ---
> 
> To me, this is a bit easier to parse, since we find the error
> cases and jump to the action before continuing on the "happy
> path". It does involve that first big refactor first, so I'd
> like to hear opinions of other contributors before you jump to
> taking this suggestion.
> 

I like how the refactored version simplifies 'cmd_mv', and how it
correspondingly simplifies the new checks in this (Shaoxuan's) patch. It
does still leave us with one big, monolithic 'cmd_mv', so in an ideal world
I'd probably lean towards pulling the innards of the main for-loop(s) into a
few dedicated functions (like 'validate_move_candidate', 'move_entry').
However, I'm happy with any improvement, and this refactor would certainly
give us that!

> Thanks,
> -Stolee
Shaoxuan Yuan May 31, 2022, 8:06 a.m. UTC | #3
On Fri, May 27, 2022 at 11:13 PM Derrick Stolee
<derrickstolee@github.com> wrote:
> > diff --git a/builtin/mv.c b/builtin/mv.c
> > index 83a465ba83..32ad4d5682 100644
> > --- a/builtin/mv.c
> > +++ b/builtin/mv.c
> > @@ -185,8 +185,32 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
> >
> >               length = strlen(src);
> >               if (lstat(src, &st) < 0) {
> > +                     /*
> > +                      * TODO: for now, when you try to overwrite a <destination>
> > +                      * with your <source> as a sparse file, if you supply a "--sparse"
> > +                      * flag, then the action will be done without providing "--force"
> > +                      * and no warning.
> > +                      *
> > +                      * This is mainly because the sparse <source>
> > +                      * is not on-disk, and this if-else chain will be cut off early in
> > +                      * this check, thus the "--force" check is ignored. Need fix.
> > +                      */
>
> I wonder if this is worth the comment here, or if we'd rather see
> the mention in the commit message. You have documented tests that
> fail in this case, so we already have something that marks this
> as "TODO" in a more discoverable place.

This comment was added during my local development, it should be
removed.

> > +                     int pos = cache_name_pos(src, length);
> > +                     if (pos >= 0) {
> > +                             const struct cache_entry *ce = active_cache[pos];
> > +
> > +                             if (ce_skip_worktree(ce)) {
> > +                                     if (!ignore_sparse)
> > +                                             string_list_append(&only_match_skip_worktree, src);
> > +                                     else
> > +                                             modes[i] = SPARSE;
>
>
> > +                             }
> > +                             else
> > +                                     bad = _("bad source");
>
> style nit:
>
>         } else {
>                 bad = _("bad source");
>         }
>
> > +                     }
> >                       /* only error if existence is expected. */
> > -                     if (modes[i] != SPARSE)
> > +                     else if (modes[i] != SPARSE)
> >                               bad = _("bad source");
>
> For this one, the comment makes it difficult to connect the 'else
> if' to its corresponding 'if'. Perhaps:
>
>         } else if (modes[i] != SPARSE) {
>                 /* only error if existence is expected. */
>                 bad = _("bad source");
>         }
>
> >               } else if (!strncmp(src, dst, length) &&
> >                               (dst[length] == 0 || dst[length] == '/')) {
>
> In general, I found this if/else-if chain hard to grok, and
> a lot of it is because we have "simple" cases at the end
> and the complicated parts have ever-increasing nesting. This
> is mostly due to the existing if/else-if chain in this method.
>
> Here is a diff that replaces that if/else-if chain with a
> 'goto' trick to jump ahead, allowing some code to decrease in
> tabbing:
>
> ---- >8 ----

[cutting the proposed refactor for space]

> ---- >8 ----
>
> But mostly the reason for this refactor is that the following
> diff should be equivalent to yours:
>
> ---- >8 ----
>
> diff --git a/builtin/mv.c b/builtin/mv.c
> index d8b5c24fb5..add48e23b4 100644
> --- a/builtin/mv.c
> +++ b/builtin/mv.c
> @@ -185,11 +185,28 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
>
>                 length = strlen(src);
>                 if (lstat(src, &st) < 0) {
> -                       /* only error if existence is expected. */
> -                       if (modes[i] != SPARSE) {
> +                       int pos;
> +                       const struct cache_entry *ce;
> +
> +                       pos = cache_name_pos(src, length);
> +                       if (pos < 0) {
> +                               /* only error if existence is expected. */
> +                               if (modes[i] != SPARSE)
> +                                       bad = _("bad source");
> +                               goto act_on_entry;
> +                       }
> +
> +                       ce = active_cache[pos];
> +                       if (!ce_skip_worktree(ce)) {
>                                 bad = _("bad source");
>                                 goto act_on_entry;
>                         }
> +
> +                       if (!ignore_sparse)
> +                               string_list_append(&only_match_skip_worktree, src);
> +                       else
> +                               modes[i] = SPARSE;
> +                       goto act_on_entry;
>                 }
>                 if (!strncmp(src, dst, length) &&
>                     (dst[length] == 0 || dst[length] == '/')) {
> ---- >8 ---
>
> To me, this is a bit easier to parse, since we find the error
> cases and jump to the action before continuing on the "happy
> path". It does involve that first big refactor first, so I'd
> like to hear opinions of other contributors before you jump to
> taking this suggestion.

True. I also find it easier to read. Though Victoria mentioned the
goto hazard, the gotos here decouples the huge chain and that
brings clarity and makes it easier to extend.
diff mbox series

Patch

diff --git a/builtin/mv.c b/builtin/mv.c
index 83a465ba83..32ad4d5682 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -185,8 +185,32 @@  int cmd_mv(int argc, const char **argv, const char *prefix)
 
 		length = strlen(src);
 		if (lstat(src, &st) < 0) {
+			/*
+			 * TODO: for now, when you try to overwrite a <destination>
+			 * with your <source> as a sparse file, if you supply a "--sparse"
+			 * flag, then the action will be done without providing "--force"
+			 * and no warning.
+			 *
+			 * This is mainly because the sparse <source>
+			 * is not on-disk, and this if-else chain will be cut off early in
+			 * this check, thus the "--force" check is ignored. Need fix.
+			 */
+
+			int pos = cache_name_pos(src, length);
+			if (pos >= 0) {
+				const struct cache_entry *ce = active_cache[pos];
+
+				if (ce_skip_worktree(ce)) {
+					if (!ignore_sparse)
+						string_list_append(&only_match_skip_worktree, src);
+					else
+						modes[i] = SPARSE;
+				}
+				else
+					bad = _("bad source");
+			}
 			/* only error if existence is expected. */
-			if (modes[i] != SPARSE)
+			else if (modes[i] != SPARSE)
 				bad = _("bad source");
 		} else if (!strncmp(src, dst, length) &&
 				(dst[length] == 0 || dst[length] == '/')) {
diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh
index 963cb512e2..581ef4c0f6 100755
--- a/t/t7002-mv-sparse-checkout.sh
+++ b/t/t7002-mv-sparse-checkout.sh
@@ -239,7 +239,7 @@  test_expect_failure 'can move out-of-cone directory with --sparse' '
 	test_path_is_file sub/folder1/file1
 '
 
-test_expect_failure 'refuse to move out-of-cone file without --sparse' '
+test_expect_success 'refuse to move out-of-cone file without --sparse' '
 	git sparse-checkout disable &&
 	git reset --hard &&
 	mkdir folder1 &&
@@ -255,7 +255,7 @@  test_expect_failure 'refuse to move out-of-cone file without --sparse' '
 	test_cmp expect stderr
 '
 
-test_expect_failure 'can move out-of-cone file with --sparse' '
+test_expect_success 'can move out-of-cone file with --sparse' '
 	git sparse-checkout disable &&
 	git reset --hard &&
 	mkdir folder1 &&