diff mbox series

[v2] hard reset safe mode

Message ID pull.1137.v2.git.1644618404948.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] hard reset safe mode | expand

Commit Message

Viacelaus Feb. 11, 2022, 10:26 p.m. UTC
From: Viacelaus <vaceslavkozin619@gmail.com>

The power of hard reset may frequently be inconvinient for a common user. So
this is an implementation of safe mode for hard reset. It can be switched on
by setting 'reset.safe' config variable to true. When running 'reset --hard'
with 'reset.safe' enabled git will check if there are any staged changes
that may be discarded by this reset. If there is a chance of deleting the
changes, git will ask the user for a confirmation with Yes/No choice.

Signed-off-by: Viacelaus <vaceslavkozin619@gmail.com>
---
    Hard reset safe mode
    
    Preventing unexpected code-deletion with hard reset 'safe mode'.
    
    Considering the recomendations for patch v1 and in order to preserve the
    current robustness of hard reset, I have made the following
    modifications to the original version (which has completely disallowed
    hard reset on unborn branch with staged files):
    
    Changes since v1:
    
     * Described security measures aren't enabled by default. Safe mode can
       be turned on with 'reset.safe' config variable.
     * Replaced the resulting error with Yes/No choice so hard reset on
       unborn branch isn`t impossible now.
     * Detection of staged changes that are going to be deleted by the reset
       isn't limited to unborn-branch state now. Git will warn you and ask
       for a confirmation if there are commits on the branch also.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1137%2FViaceslavus%2Fhard-reset-safety-on-empty-repo-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1137/Viaceslavus/hard-reset-safety-on-empty-repo-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1137

Range-diff vs v1:

 1:  13cc01b7de7 ! 1:  e6eec1bfda2 forbid a hard reset before the initial commit
     @@ Metadata
      Author: Viacelaus <vaceslavkozin619@gmail.com>
      
       ## Commit message ##
     -    forbid a hard reset before the initial commit
     +    hard reset safe mode
      
     -    Performing 'git reset --hard' on empty repo with staged files
     -    may have the only one possible result - deleting all staged files.
     -    Such behaviour may be unexpected or even dangerous. With this
     -    commit, when running 'git reset --hard', git will check for the
     -    existence of commits in the repo; in case of absence of such, and
     -    also if there are any files staged, git will die with an error.
     +    The power of hard reset may frequently be inconvinient for a common user. So
     +    this is an implementation of safe mode for hard reset. It can be switched on
     +    by setting 'reset.safe' config variable to true. When running 'reset --hard'
     +    with 'reset.safe' enabled git will check if there are any staged changes
     +    that may be discarded by this reset. If there is a chance of deleting the
     +    changes, git will ask the user for a confirmation with Yes/No choice.
      
          Signed-off-by: Viacelaus <vaceslavkozin619@gmail.com>
      
       ## builtin/reset.c ##
     +@@
     + #include "submodule.h"
     + #include "submodule-config.h"
     + #include "dir.h"
     ++#include "wt-status.h"
     + 
     + #define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
     + 
      @@ builtin/reset.c: static void die_if_unmerged_cache(int reset_type)
       
       }
     @@ builtin/reset.c: static void die_if_unmerged_cache(int reset_type)
      +{
      +	return is_branch(refname);
      +}
     ++
     ++static void accept_discarding_changes(void) {
     ++	int answer = getc(stdin);
     ++	printf(_("Some staged changes may be discarded by this reset. Continue? [Y/n]"));
     ++
     ++	if (answer != 'y' && answer != 'Y') {
     ++		printf(_("aborted\n"));
     ++		exit(1);
     ++	}
     ++}
     ++
     ++static void detect_risky_reset(int commits_exist) {
     ++	int cache = read_cache();
     ++	if(!commits_exist) {
     ++		if(cache == 1) {
     ++			accept_discarding_changes();
     ++		}
     ++	}
     ++	else {
     ++		if(has_uncommitted_changes(the_repository, 1)) {
     ++			accept_discarding_changes();
     ++		}
     ++	}
     ++}
      +
       static void parse_args(struct pathspec *pathspec,
       		       const char **argv, const char *prefix,
     @@ builtin/reset.c: int cmd_reset(int argc, const char **argv, const char *prefix)
       	}
      +
      +	if (reset_type == HARD) {
     -+		int commits_exist = for_each_fullref_in("refs/heads", check_commit_exists, NULL);
     -+		if(!commits_exist) {
     -+			if(read_cache() == 1)
     -+				die(_("Hard reset isn`t allowed before the first commit."));
     ++		int safe = 0;
     ++		git_config_get_bool("reset.safe", &safe);
     ++		if (safe) {
     ++			int commits_exist = for_each_fullref_in("refs/heads", check_commit_exists, NULL);
     ++			detect_risky_reset(commits_exist);
      +		}
      +	}
      +
     @@ t/t7104-reset-hard.sh: test_expect_success 'reset --hard did not corrupt index o
       
       '
       
     -+test_expect_success 'reset --hard on empty repo without staged changes works fine' '
     -+	git reset --hard
     ++test_expect_success 'reset --hard in safe mode on unborn branch with staged files results in a warning' '
     ++	git config reset.safe on &&
     ++	touch a &&
     ++	git add a &&
     ++	test_must_fail git reset --hard
     ++
      +'
      +
     -+test_expect_success 'reset --hard on empty repo with staged changes results in an error' '
     -+	touch n &&
     -+	git add n &&
     -+	test_must_fail git reet --hard
     ++test_expect_success 'reset --hard in safe mode after a commit without staged changes works fine' '
     ++	git config reset.safe on &&
     ++	touch b &&
     ++	git add b &&
     ++	git commit -m "initial" &&
     ++	git reset --hard
     ++
      +'
      +
     -+test_expect_success 'reset --hard after a commit works fine' '
     -+	touch new &&
     -+	git add new &&
     ++test_expect_success 'reset --hard in safe mode after a commit with staged changes results in a warning' '
     ++	git config reset.safe on &&
     ++	touch c d &&
     ++	git add c &&
      +	git commit -m "initial" &&
     -+	git reset --hard 2> error &&
     -+	test_must_be_empty error
     ++	git add d &&
     ++	test_must_fail git reset --hard
     ++
      +'
      +
       test_done
     -
     - ## t/t7106-reset-unborn-branch.sh ##
     -@@ t/t7106-reset-unborn-branch.sh: test_expect_success 'reset --soft is a no-op' '
     - 	test_cmp expect actual
     - '
     - 
     --test_expect_success 'reset --hard' '
     --	rm .git/index &&
     --	git add a &&
     --	test_when_finished "echo a >a" &&
     --	git reset --hard &&
     --
     --	git ls-files >actual &&
     --	test_must_be_empty actual &&
     --	test_path_is_missing a
     --'
     --
     - test_done


 builtin/reset.c       | 40 ++++++++++++++++++++++++++++++++++++++++
 t/t7104-reset-hard.sh | 27 +++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)


base-commit: 5d01301f2b865aa8dba1654d3f447ce9d21db0b5

Comments

Junio C Hamano Feb. 11, 2022, 11:03 p.m. UTC | #1
"Viaceslavus via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Viacelaus <vaceslavkozin619@gmail.com>
>
> The power of hard reset may frequently be inconvinient for a common user. So
> this is an implementation of safe mode for hard reset. It can be switched on
> by setting 'reset.safe' config variable to true. When running 'reset --hard'
> with 'reset.safe' enabled git will check if there are any staged changes
> that may be discarded by this reset. If there is a chance of deleting the
> changes, git will ask the user for a confirmation with Yes/No choice.

There needs an explanation on how this avoids breaking scripts that
trust that "git reset --hard HEAD" reliably matches the index and
the working tree files to what is recorded in HEAD without getting
stuck waiting for any user input.  "They can turn off reset.safe" is
not an acceptable answer.

> +static int check_commit_exists(const char *refname, const struct object_id *oid, int f, void *d)
> +{
> +	return is_branch(refname);
> +}

The returned value from a for_each_ref() callback is used to tell
the caller "stop here, no need to further iterate and call me with
other refs".  I think this wants to say "if I ever get called even
once, tell the caller to stop, so that it can tell its caller that
it was stopped".

> +static void accept_discarding_changes(void) {
> +	int answer = getc(stdin);
> +	printf(_("Some staged changes may be discarded by this reset. Continue? [Y/n]"));
> +
> +	if (answer != 'y' && answer != 'Y') {
> +		printf(_("aborted\n"));
> +		exit(1);
> +	}
> +}

I'd think at least we should use git_prompt(), instead of
hand-rolled prompt routine like this one that assumes that an
end-user is sitting in front of the terminal waiting to be prompted.

If updating "git reset" like this patch does were a good idea to
begin with, that is.

> +static void detect_risky_reset(int commits_exist) {
> +	int cache = read_cache();
> +	if(!commits_exist) {
> +		if(cache == 1) {
> +			accept_discarding_changes();
> +		}
> +	}
> +	else {
> +		if(has_uncommitted_changes(the_repository, 1)) {
> +			accept_discarding_changes();
> +		}
> +	}
> +}

Style (too many to list---see Documentation/CodingGuidelines).

> +	if (reset_type == HARD) {
> +		int safe = 0;
> +		git_config_get_bool("reset.safe", &safe);
> +		if (safe) {
> +			int commits_exist = for_each_fullref_in("refs/heads", check_commit_exists, NULL);

The callback is called for each and every ref inside "refs/heads/",
so by definition, shouldn't any of them pass "is_branch(refname)"?

In any case, why does this have to be done by the caller?  If the
helper claims to be capable of detecting a "risky reset" (if such a
thing exists, that is), and if the helper behaves differently when
there is any commit on any branch or not as its implementation
detail, shouldn't it figure out if there is a commit _inside_ the
helper itself, not forcing the caller to compute it for it?

> +			detect_risky_reset(commits_exist);
> +		}
> +	}
> +
>  	if (reset_type == NONE)
>  		reset_type = MIXED; /* by default */
>  

> diff --git a/t/t7104-reset-hard.sh b/t/t7104-reset-hard.sh
> index cf9697eba9a..c962c706bed 100755
> --- a/t/t7104-reset-hard.sh
> +++ b/t/t7104-reset-hard.sh
> @@ -44,4 +44,31 @@ test_expect_success 'reset --hard did not corrupt index or cache-tree' '
>  
>  '
>  
> +test_expect_success 'reset --hard in safe mode on unborn branch with staged files results in a warning' '
> +	git config reset.safe on &&

Use either "test_when_finished" or "test_config", which is a good
way to isolate each test from side effects of running the test that
comes before it.
Bagas Sanjaya Feb. 14, 2022, 3:14 a.m. UTC | #2
On 12/02/22 06.03, Junio C Hamano wrote:
> "Viaceslavus via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> From: Viacelaus <vaceslavkozin619@gmail.com>
>>
>> The power of hard reset may frequently be inconvinient for a common user. So
>> this is an implementation of safe mode for hard reset. It can be switched on
>> by setting 'reset.safe' config variable to true. When running 'reset --hard'
>> with 'reset.safe' enabled git will check if there are any staged changes
>> that may be discarded by this reset. If there is a chance of deleting the
>> changes, git will ask the user for a confirmation with Yes/No choice.
> 
> There needs an explanation on how this avoids breaking scripts that
> trust that "git reset --hard HEAD" reliably matches the index and
> the working tree files to what is recorded in HEAD without getting
> stuck waiting for any user input.  "They can turn off reset.safe" is
> not an acceptable answer.
> 
>> +static int check_commit_exists(const char *refname, const struct object_id *oid, int f, void *d)
>> +{
>> +	return is_branch(refname);
>> +}
> 
> The returned value from a for_each_ref() callback is used to tell
> the caller "stop here, no need to further iterate and call me with
> other refs".  I think this wants to say "if I ever get called even
> once, tell the caller to stop, so that it can tell its caller that
> it was stopped".
> 
>> +static void accept_discarding_changes(void) {
>> +	int answer = getc(stdin);
>> +	printf(_("Some staged changes may be discarded by this reset. Continue? [Y/n]"));
>> +
>> +	if (answer != 'y' && answer != 'Y') {
>> +		printf(_("aborted\n"));
>> +		exit(1);
>> +	}
>> +}
> 
> I'd think at least we should use git_prompt(), instead of
> hand-rolled prompt routine like this one that assumes that an
> end-user is sitting in front of the terminal waiting to be prompted.
> 
> If updating "git reset" like this patch does were a good idea to
> begin with, that is.
> 

I think it's better just to error out when there are staged changes.
The message will be something like:
"error: there are staged changes that will be hard-reset. Please
commit or stash them before resetting. If you want to remove all
changes from index, you can do so by running 'git reset'."
Junio C Hamano Feb. 14, 2022, 11:44 a.m. UTC | #3
Bagas Sanjaya <bagasdotme@gmail.com> writes:

> I think it's better just to error out when there are staged changes.
> The message will be something like:
> "error: there are staged changes that will be hard-reset. Please
> commit or stash them before resetting. If you want to remove all
> changes from index, you can do so by running 'git reset'."

I do prefer not to have interactive prompt at all, as we _will_ get
it wrong when we shouldn't be doing the warning or prompting (i.e.
when no user is sitting in front of the terminal).

Another thing that I would be very opposed to in the posted patch is
the special casing of an initial commit.  If typing 10,000 lines of
material to create a new file, doing "git add", and changing mind
and saying "git reset --hard" to lose the new file is painful before
creating the first commit, doing exactly the same in a project after
10 years with 10,000 commits will hurt exactly the same way.  You'd
"lose" the 10,000 lines of material you prepared to the "accident"
either way.

One more thing the posted patch is designed in a wrong way is that
it cares about the state of _other_ branches; even if you "git reset
--hard" on an unborn branch, if there are some commits on other
branches that has nothing to do with your current state, it will do
what it was told to do regardless of "safe mode".  The "damage"
caused by such an "accident" would be the same for the end-user,
whether there is an unrelated branch with a commit or there isn't.
You'd "lose" the 10,000 lines of material you prepared to the
"accident" regardless.

So, I am strongly opposed to the posted patch.  "git reset --hard"
is designed to bring the index and the working tree to the pristine
state in a predictable way, and making it work differently depending
on a setting, or if it is on an unborn branch, or if there are
commits on any branch, goes very against its spirit.

After all, "reset --hard" is called *HARD* for a reason.  It is not
"unsafe".  It is designed to give you pristine state no matter what
and it tries very hard to do so, by even removing an untracked file
that is squatting the path at which the pristine state wants a
directory.

In other words, those who type "git reset --hard" WITHOUT wishing to
go back to the pristine state by clearing the deck may be using a
wrong command.

I wonder if "git reset" (i.e. mixed, not hard) is what they want to
use, instead.  It is the command to use when: "I've made a mess in
the index after doing 'add -p' and other things, and want to get the
index into pristine state, while keeping the working tree files".

After doing so, the first thing they can do is to "git diff" to
compare what they truly do not want to have anywhere and "git
checkout -- <path>" to update the whole file to the state in the
index, or "git checkout -p -- <path>" to update them piecemeal.

Or perhaps they want "git stash".  That would clear the deck and
give them the pristine state justlike "git reset --hard" would, but
if they found that they did so by mistake, they can immediately
unstash.

Having said all that, because "reset" is mostly about the index and
not about the working tree (the core of the Git's philosophy is that
the index is all that matters and the working tree files are just
ephemeral means to the end, which is to update the index with good
contents and to make tree out of it), I can see us adding another
mode, perhaps called "reset --clear", that is almost like "reset
--hard" with one distinction, for end-user's interactive use.

If you did this sequence:

    $ git reset --hard    ;# start from a pristine state

    $ date >new-file-a    ;# create a new file
    $ date >new-file-b    ;# create another new file
    $ git add new-file-a  ;# just add one of them

    $ git reset --hard    ;# get back to the pristine state

the second reset will make sure that neither of these two files are
in the index, but it also removes only one of the files in the
working tree, while leaving the other file in the working tree.

The fact that new-file-a has been "git add"ed means that after
losing new-file-a, its contents *can* be recovered (i.e. it is one
of the loose objects that is no longer reachable, and "git fsck
--lost-found" could theoretically give it back to you).

But new-file-b has never been added, so losing it is a more grave
loss.  So the behaviour to remove new-file-a that was in the index
while leaving new-file-b alone is somewhat justifiable, but not by a
large margin.  A new "reset --clear" mode _could_ make paths in the
index that do not appear in the resetted-to treeish (either HEAD or
an empty tree if the branch is unborn) untracked, while leaving their
working tree files alone.
diff mbox series

Patch

diff --git a/builtin/reset.c b/builtin/reset.c
index b97745ee94e..997e2adf8d8 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -26,6 +26,7 @@ 
 #include "submodule.h"
 #include "submodule-config.h"
 #include "dir.h"
+#include "wt-status.h"
 
 #define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
 
@@ -301,6 +302,35 @@  static void die_if_unmerged_cache(int reset_type)
 
 }
 
+static int check_commit_exists(const char *refname, const struct object_id *oid, int f, void *d)
+{
+	return is_branch(refname);
+}
+
+static void accept_discarding_changes(void) {
+	int answer = getc(stdin);
+	printf(_("Some staged changes may be discarded by this reset. Continue? [Y/n]"));
+
+	if (answer != 'y' && answer != 'Y') {
+		printf(_("aborted\n"));
+		exit(1);
+	}
+}
+
+static void detect_risky_reset(int commits_exist) {
+	int cache = read_cache();
+	if(!commits_exist) {
+		if(cache == 1) {
+			accept_discarding_changes();
+		}
+	}
+	else {
+		if(has_uncommitted_changes(the_repository, 1)) {
+			accept_discarding_changes();
+		}
+	}
+}
+
 static void parse_args(struct pathspec *pathspec,
 		       const char **argv, const char *prefix,
 		       int patch_mode,
@@ -474,6 +504,16 @@  int cmd_reset(int argc, const char **argv, const char *prefix)
 			die(_("Cannot do %s reset with paths."),
 					_(reset_type_names[reset_type]));
 	}
+
+	if (reset_type == HARD) {
+		int safe = 0;
+		git_config_get_bool("reset.safe", &safe);
+		if (safe) {
+			int commits_exist = for_each_fullref_in("refs/heads", check_commit_exists, NULL);
+			detect_risky_reset(commits_exist);
+		}
+	}
+
 	if (reset_type == NONE)
 		reset_type = MIXED; /* by default */
 
diff --git a/t/t7104-reset-hard.sh b/t/t7104-reset-hard.sh
index cf9697eba9a..c962c706bed 100755
--- a/t/t7104-reset-hard.sh
+++ b/t/t7104-reset-hard.sh
@@ -44,4 +44,31 @@  test_expect_success 'reset --hard did not corrupt index or cache-tree' '
 
 '
 
+test_expect_success 'reset --hard in safe mode on unborn branch with staged files results in a warning' '
+	git config reset.safe on &&
+	touch a &&
+	git add a &&
+	test_must_fail git reset --hard
+
+'
+
+test_expect_success 'reset --hard in safe mode after a commit without staged changes works fine' '
+	git config reset.safe on &&
+	touch b &&
+	git add b &&
+	git commit -m "initial" &&
+	git reset --hard
+
+'
+
+test_expect_success 'reset --hard in safe mode after a commit with staged changes results in a warning' '
+	git config reset.safe on &&
+	touch c d &&
+	git add c &&
+	git commit -m "initial" &&
+	git add d &&
+	test_must_fail git reset --hard
+
+'
+
 test_done