diff mbox series

setup: only die on invalid .git under RUN_SETUP

Message ID patch-1.1-fc26c46d39-20210722T140648Z-avarab@gmail.com (mailing list archive)
State New
Headers show
Series setup: only die on invalid .git under RUN_SETUP | expand

Commit Message

Ævar Arnfjörð Bjarmason July 22, 2021, 2:07 p.m. UTC
Change RUN_SETUP_GENTLY to stop dying if e.g. the .git is "not a
repo". This means that we now recover in cases like:

    $ echo "gitdir: /foo/bar" > .git
    $ git ls-remote https://github.com/torvalds/linux
    [... ls-remote output ...]

But not (as intended):

    $ git rev-parse HEAD
    fatal: not a git repository: /foo/bar

The relevant setup_git_directory_gently_1() invocation was added in
01017dce546 (setup_git_directory_gently_1(): avoid die()ing,
2017-03-13), but I could reproduce this as far back as Git v1.8.0. I
don't know if this ever worked, but it should.

Let's also use the compiler to check enum arms for us, instead of
having a "default" fall-though case, this changes code added in
ce9b8aab5d9 (setup_git_directory_1(): avoid changing global state,
2017-03-13).

Reported-by: Tom Cook <tom.k.cook@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 setup.c            | 27 ++++++++++++++++++++++-----
 t/t0002-gitfile.sh |  8 ++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

Comments

Junio C Hamano July 22, 2021, 7:06 p.m. UTC | #1
Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> Change RUN_SETUP_GENTLY to stop dying if e.g. the .git is "not a
> repo". This means that we now recover in cases like:
>
>     $ echo "gitdir: /foo/bar" > .git
>     $ git ls-remote https://github.com/torvalds/linux
>     [... ls-remote output ...]
>
> But not (as intended):
>
>     $ git rev-parse HEAD
>     fatal: not a git repository: /foo/bar

I am of two minds.  ls-remote is benign in that it behaves more or
less identically when given certain types of args, and the above may
be a strict improvement (but it does fail if you did not use URL but
use a remote nickname you thought you configured in the repository
in such a situation).  There however are a few niche commands that
work inside and outside a repository and they work differently.  For
example, if you do

  $ git diff file1 file2

in such a corrupt repository, I'd prefer to see the command _fail_
to nudge the user to look into the situation, instead of taking the
output (which degenerates to "git diff --no-index file1 file2"
outside a repository) blindly as a patch that shows the changes
relative to the index for these two paths.
Andrei Rybak July 22, 2021, 8:50 p.m. UTC | #2
On 22/07/2021 16:07, Ævar Arnfjörð Bjarmason wrote:
> Change RUN_SETUP_GENTLY to stop dying if e.g. the .git is "not a
> repo". This means that we now recover in cases like:
> 
>      $ echo "gitdir: /foo/bar" > .git
>      $ git ls-remote https://github.com/torvalds/linux
>      [... ls-remote output ...]
> 
> But not (as intended):
> 
>      $ git rev-parse HEAD
>      fatal: not a git repository: /foo/bar
> 
> The relevant setup_git_directory_gently_1() invocation was added in
> 01017dce546 (setup_git_directory_gently_1(): avoid die()ing,
> 2017-03-13), but I could reproduce this as far back as Git v1.8.0. I
> don't know if this ever worked, but it should.
> 
> Let's also use the compiler to check enum arms for us, instead of
> having a "default" fall-though case, this changes code added in
> ce9b8aab5d9 (setup_git_directory_1(): avoid changing global state,
> 2017-03-13).
> 
> Reported-by: Tom Cook <tom.k.cook@gmail.com>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>   setup.c            | 27 ++++++++++++++++++++++-----
>   t/t0002-gitfile.sh |  8 ++++++--
>   2 files changed, 28 insertions(+), 7 deletions(-)
> 
> diff --git a/setup.c b/setup.c
> index eb9367ca5c..6ff145d58b 100644
> --- a/setup.c
> +++ b/setup.c
> @@ -1033,7 +1033,8 @@ enum discovery_result {
>   	/* these are errors */
>   	GIT_DIR_HIT_CEILING = -1,
>   	GIT_DIR_HIT_MOUNT_POINT = -2,
> -	GIT_DIR_INVALID_GITFILE = -3
> +	GIT_DIR_INVALID_GITFILE = -3,
> +	GIT_DIR_GITFILE_NOT_A_REPO = -4,
>   };
>   
>   /*
> @@ -1118,8 +1119,11 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir,
>   				/* NEEDSWORK: fail if .git is not file nor dir */
>   				if (is_git_directory(dir->buf))
>   					gitdirenv = DEFAULT_GIT_DIR_ENVIRONMENT;
> -			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED)
> +			} else if (error_code == READ_GITFILE_ERR_NOT_A_REPO) {
> +				return GIT_DIR_GITFILE_NOT_A_REPO;
> +			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED) {
>   				return GIT_DIR_INVALID_GITFILE;
> +			}
>   		}
>   		strbuf_setlen(dir, offset);
>   		if (gitdirenv) {
> @@ -1209,6 +1213,8 @@ const char *setup_git_directory_gently(int *nongit_ok)
>   	struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT;
>   	const char *prefix = NULL;
>   	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
> +	int die_on_error = !nongit_ok;
> +	enum discovery_result discovery;
>   
>   	/*
>   	 * We may have read an incomplete configuration before
> @@ -1231,7 +1237,9 @@ const char *setup_git_directory_gently(int *nongit_ok)
>   		die_errno(_("Unable to read current working directory"));
>   	strbuf_addbuf(&dir, &cwd);
>   
> -	switch (setup_git_directory_gently_1(&dir, &gitdir, 1)) {
> +	discovery = setup_git_directory_gently_1(&dir, &gitdir, die_on_error);
> +
> +	switch (discovery) {
>   	case GIT_DIR_EXPLICIT:
>   		prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok);
>   		break;
> @@ -1259,6 +1267,16 @@ const char *setup_git_directory_gently(int *nongit_ok)
>   			    dir.buf);
>   		*nongit_ok = 1;
>   		break;
> +	case GIT_DIR_GITFILE_NOT_A_REPO:
> +		if (!nongit_ok)
> +			die(_("not a git repository: %s"), dir.buf);
> +		*nongit_ok = 1;
> +		break;
> +	case GIT_DIR_INVALID_GITFILE:
> +		if (!nongit_ok)

Variable die_on_error could be used in two `if`s above.

> +			die(_("invalid .git file: %s"), dir.buf);
> +		*nongit_ok = 1;
> +		break;
>   	case GIT_DIR_NONE:
>   		/*
>   		 * As a safeguard against setup_git_directory_gently_1 returning
> @@ -1266,8 +1284,7 @@ const char *setup_git_directory_gently(int *nongit_ok)
>   		 * set startup_info->have_repository to 1 when we did nothing to
>   		 * find a repository.
>   		 */
> -	default:
> -		BUG("unhandled setup_git_directory_1() result");
> +		BUG("setup_git_directory_1() should not return GIT_DIR_NONE");
>   	}
>   
>   	/*
> diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
> index 8440e6add1..176dc8c9dc 100755
> --- a/t/t0002-gitfile.sh
> +++ b/t/t0002-gitfile.sh
> @@ -21,13 +21,17 @@ test_expect_success 'initial setup' '
>   test_expect_success 'bad setup: invalid .git file format' '
>   	echo "gitdir $REAL" >.git &&
>   	test_must_fail git rev-parse 2>.err &&
> -	test_i18ngrep "invalid gitfile format" .err
> +	test_i18ngrep "invalid gitfile format" .err &&
> +
> +	git ls-remote "file://$REAL"
>   '
>   
>   test_expect_success 'bad setup: invalid .git file path' '
>   	echo "gitdir: $REAL.not" >.git &&
>   	test_must_fail git rev-parse 2>.err &&
> -	test_i18ngrep "not a git repository" .err
> +	test_i18ngrep "not a git repository" .err &&
> +
> +	git ls-remote "file://$REAL"
>   '
>   
>   test_expect_success 'final setup + check rev-parse --git-dir' '
>
Ævar Arnfjörð Bjarmason July 22, 2021, 9:08 p.m. UTC | #3
On Thu, Jul 22 2021, Junio C Hamano wrote:

> Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:
>
>> Change RUN_SETUP_GENTLY to stop dying if e.g. the .git is "not a
>> repo". This means that we now recover in cases like:
>>
>>     $ echo "gitdir: /foo/bar" > .git
>>     $ git ls-remote https://github.com/torvalds/linux
>>     [... ls-remote output ...]
>>
>> But not (as intended):
>>
>>     $ git rev-parse HEAD
>>     fatal: not a git repository: /foo/bar
>
> I am of two minds.  ls-remote is benign in that it behaves more or
> less identically when given certain types of args, and the above may
> be a strict improvement (but it does fail if you did not use URL but
> use a remote nickname you thought you configured in the repository
> in such a situation).  There however are a few niche commands that
> work inside and outside a repository and they work differently.  For
> example, if you do
>
>   $ git diff file1 file2
>
> in such a corrupt repository, I'd prefer to see the command _fail_
> to nudge the user to look into the situation, instead of taking the
> output (which degenerates to "git diff --no-index file1 file2"
> outside a repository) blindly as a patch that shows the changes
> relative to the index for these two paths.

Yes it comes down to what we think RUN_SETUP_GENTLY and
setup_git_directory_gently() should be doing.

I.e. is &nongit_ok supposed to be a binary "repo you can use"/"[...]
can't use", or a tri-state of that plus "this looks like it's supposed
to be a repo, but it's broken, so let's die".

Anyway, if you're not happy with this pretty much as-is consider it
dropped from my side, because I think the next step would be to do some
more work on e.g. split up RUN_SETUP_GENTLY into a mode that makes sense
for "diff", and another for "bugreport" and "ls-remote". I figured this
was an easy bugfix, but if not perhaps Tom Cooks wants to pick this
up...

I guess another easy alternative would be to issue a warning() in this
case, which is a relatively light refactoring of passing an error
message up from the relevant function(s) instead of having it die()
directly.
Junio C Hamano July 23, 2021, 1:59 a.m. UTC | #4
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> I guess another easy alternative would be to issue a warning() in this
> case, which is a relatively light refactoring of passing an error
> message up from the relevant function(s) instead of having it die()
> directly.

I think the end users deserve a warning even when they are running
ls-remote or bugreport in such a corrupt repository-lookalike.  It
might be sufficient to make "git diff [--no-index]" safe enough.

Thanks.
Tom Cook July 23, 2021, 8:42 a.m. UTC | #5
On Thu, Jul 22, 2021 at 10:19 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:

> Anyway, if you're not happy with this pretty much as-is consider it
> dropped from my side, because I think the next step would be to do some
> more work on e.g. split up RUN_SETUP_GENTLY into a mode that makes sense
> for "diff", and another for "bugreport" and "ls-remote". I figured this
> was an easy bugfix, but if not perhaps Tom Cooks wants to pick this
> up...

Just to note that I am following this conversation.  I don't have any
time available to look into this immediately but probably will in a
couple of weeks.  If it's not resolved by then, then I can pick it up.
But please continue the conversation; I've no familiarity at all with
git internals so the more detail that comes out here, the better.

Regards,
Tom
Ævar Arnfjörð Bjarmason July 23, 2021, 9:33 a.m. UTC | #6
On Thu, Jul 22 2021, Andrei Rybak wrote:

> On 22/07/2021 16:07, Ævar Arnfjörð Bjarmason wrote:
>> Change RUN_SETUP_GENTLY to stop dying if e.g. the .git is "not a
>> repo". This means that we now recover in cases like:
>>      $ echo "gitdir: /foo/bar" > .git
>>      $ git ls-remote https://github.com/torvalds/linux
>>      [... ls-remote output ...]
>> But not (as intended):
>>      $ git rev-parse HEAD
>>      fatal: not a git repository: /foo/bar
>> The relevant setup_git_directory_gently_1() invocation was added in
>> 01017dce546 (setup_git_directory_gently_1(): avoid die()ing,
>> 2017-03-13), but I could reproduce this as far back as Git v1.8.0. I
>> don't know if this ever worked, but it should.
>> Let's also use the compiler to check enum arms for us, instead of
>> having a "default" fall-though case, this changes code added in
>> ce9b8aab5d9 (setup_git_directory_1(): avoid changing global state,
>> 2017-03-13).
>> Reported-by: Tom Cook <tom.k.cook@gmail.com>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>   setup.c            | 27 ++++++++++++++++++++++-----
>>   t/t0002-gitfile.sh |  8 ++++++--
>>   2 files changed, 28 insertions(+), 7 deletions(-)
>> diff --git a/setup.c b/setup.c
>> index eb9367ca5c..6ff145d58b 100644
>> --- a/setup.c
>> +++ b/setup.c
>> @@ -1033,7 +1033,8 @@ enum discovery_result {
>>   	/* these are errors */
>>   	GIT_DIR_HIT_CEILING = -1,
>>   	GIT_DIR_HIT_MOUNT_POINT = -2,
>> -	GIT_DIR_INVALID_GITFILE = -3
>> +	GIT_DIR_INVALID_GITFILE = -3,
>> +	GIT_DIR_GITFILE_NOT_A_REPO = -4,
>>   };
>>     /*
>> @@ -1118,8 +1119,11 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir,
>>   				/* NEEDSWORK: fail if .git is not file nor dir */
>>   				if (is_git_directory(dir->buf))
>>   					gitdirenv = DEFAULT_GIT_DIR_ENVIRONMENT;
>> -			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED)
>> +			} else if (error_code == READ_GITFILE_ERR_NOT_A_REPO) {
>> +				return GIT_DIR_GITFILE_NOT_A_REPO;
>> +			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED) {
>>   				return GIT_DIR_INVALID_GITFILE;
>> +			}
>>   		}
>>   		strbuf_setlen(dir, offset);
>>   		if (gitdirenv) {
>> @@ -1209,6 +1213,8 @@ const char *setup_git_directory_gently(int *nongit_ok)
>>   	struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT;
>>   	const char *prefix = NULL;
>>   	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
>> +	int die_on_error = !nongit_ok;
>> +	enum discovery_result discovery;
>>     	/*
>>   	 * We may have read an incomplete configuration before
>> @@ -1231,7 +1237,9 @@ const char *setup_git_directory_gently(int *nongit_ok)
>>   		die_errno(_("Unable to read current working directory"));
>>   	strbuf_addbuf(&dir, &cwd);
>>   -	switch (setup_git_directory_gently_1(&dir, &gitdir, 1)) {
>> +	discovery = setup_git_directory_gently_1(&dir, &gitdir, die_on_error);
>> +
>> +	switch (discovery) {
>>   	case GIT_DIR_EXPLICIT:
>>   		prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok);
>>   		break;
>> @@ -1259,6 +1267,16 @@ const char *setup_git_directory_gently(int *nongit_ok)
>>   			    dir.buf);
>>   		*nongit_ok = 1;
>>   		break;
>> +	case GIT_DIR_GITFILE_NOT_A_REPO:
>> +		if (!nongit_ok)
>> +			die(_("not a git repository: %s"), dir.buf);
>> +		*nongit_ok = 1;
>> +		break;
>> +	case GIT_DIR_INVALID_GITFILE:
>> +		if (!nongit_ok)
>
> Variable die_on_error could be used in two `if`s above.

Re-reading my own code I think it's better just to drop die_on_error
entirely and use !nongit_ok consistently, as the rest of the function
does. What do yo think?
Junio C Hamano July 23, 2021, 3:21 p.m. UTC | #7
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

>>> +	int die_on_error = !nongit_ok;
>>> +	enum discovery_result discovery;
>>> ...    	/*
>>> +	case GIT_DIR_GITFILE_NOT_A_REPO:
>>> +		if (!nongit_ok)
>>> +			die(_("not a git repository: %s"), dir.buf);
>>> +		*nongit_ok = 1;
>>> +		break;
>>> +	case GIT_DIR_INVALID_GITFILE:
>>> +		if (!nongit_ok)
>>
>> Variable die_on_error could be used in two `if`s above.
>
> Re-reading my own code I think it's better just to drop die_on_error
> entirely and use !nongit_ok consistently, as the rest of the function
> does. What do yo think?

I think "not X_ok" means we do not consider X is OK, and agree with
you that the code is clearer without an extra indirection (I do not
know if you meant to address me, though).
diff mbox series

Patch

diff --git a/setup.c b/setup.c
index eb9367ca5c..6ff145d58b 100644
--- a/setup.c
+++ b/setup.c
@@ -1033,7 +1033,8 @@  enum discovery_result {
 	/* these are errors */
 	GIT_DIR_HIT_CEILING = -1,
 	GIT_DIR_HIT_MOUNT_POINT = -2,
-	GIT_DIR_INVALID_GITFILE = -3
+	GIT_DIR_INVALID_GITFILE = -3,
+	GIT_DIR_GITFILE_NOT_A_REPO = -4,
 };
 
 /*
@@ -1118,8 +1119,11 @@  static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir,
 				/* NEEDSWORK: fail if .git is not file nor dir */
 				if (is_git_directory(dir->buf))
 					gitdirenv = DEFAULT_GIT_DIR_ENVIRONMENT;
-			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED)
+			} else if (error_code == READ_GITFILE_ERR_NOT_A_REPO) {
+				return GIT_DIR_GITFILE_NOT_A_REPO;
+			} else if (error_code != READ_GITFILE_ERR_STAT_FAILED) {
 				return GIT_DIR_INVALID_GITFILE;
+			}
 		}
 		strbuf_setlen(dir, offset);
 		if (gitdirenv) {
@@ -1209,6 +1213,8 @@  const char *setup_git_directory_gently(int *nongit_ok)
 	struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT;
 	const char *prefix = NULL;
 	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
+	int die_on_error = !nongit_ok;
+	enum discovery_result discovery;
 
 	/*
 	 * We may have read an incomplete configuration before
@@ -1231,7 +1237,9 @@  const char *setup_git_directory_gently(int *nongit_ok)
 		die_errno(_("Unable to read current working directory"));
 	strbuf_addbuf(&dir, &cwd);
 
-	switch (setup_git_directory_gently_1(&dir, &gitdir, 1)) {
+	discovery = setup_git_directory_gently_1(&dir, &gitdir, die_on_error);
+
+	switch (discovery) {
 	case GIT_DIR_EXPLICIT:
 		prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok);
 		break;
@@ -1259,6 +1267,16 @@  const char *setup_git_directory_gently(int *nongit_ok)
 			    dir.buf);
 		*nongit_ok = 1;
 		break;
+	case GIT_DIR_GITFILE_NOT_A_REPO:
+		if (!nongit_ok)
+			die(_("not a git repository: %s"), dir.buf);
+		*nongit_ok = 1;
+		break;
+	case GIT_DIR_INVALID_GITFILE:
+		if (!nongit_ok)
+			die(_("invalid .git file: %s"), dir.buf);
+		*nongit_ok = 1;
+		break;
 	case GIT_DIR_NONE:
 		/*
 		 * As a safeguard against setup_git_directory_gently_1 returning
@@ -1266,8 +1284,7 @@  const char *setup_git_directory_gently(int *nongit_ok)
 		 * set startup_info->have_repository to 1 when we did nothing to
 		 * find a repository.
 		 */
-	default:
-		BUG("unhandled setup_git_directory_1() result");
+		BUG("setup_git_directory_1() should not return GIT_DIR_NONE");
 	}
 
 	/*
diff --git a/t/t0002-gitfile.sh b/t/t0002-gitfile.sh
index 8440e6add1..176dc8c9dc 100755
--- a/t/t0002-gitfile.sh
+++ b/t/t0002-gitfile.sh
@@ -21,13 +21,17 @@  test_expect_success 'initial setup' '
 test_expect_success 'bad setup: invalid .git file format' '
 	echo "gitdir $REAL" >.git &&
 	test_must_fail git rev-parse 2>.err &&
-	test_i18ngrep "invalid gitfile format" .err
+	test_i18ngrep "invalid gitfile format" .err &&
+
+	git ls-remote "file://$REAL"
 '
 
 test_expect_success 'bad setup: invalid .git file path' '
 	echo "gitdir: $REAL.not" >.git &&
 	test_must_fail git rev-parse 2>.err &&
-	test_i18ngrep "not a git repository" .err
+	test_i18ngrep "not a git repository" .err &&
+
+	git ls-remote "file://$REAL"
 '
 
 test_expect_success 'final setup + check rev-parse --git-dir' '