diff mbox series

[v4,1/4] sparse index: enable only for git repos

Message ID 81e208cf454b61c761fa66e4f43a464ed439ba59.1637620958.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse Index: diff and blame builtins | expand

Commit Message

Lessley Dennington Nov. 22, 2021, 10:42 p.m. UTC
From: Lessley Dennington <lessleydennington@gmail.com>

Check whether git dir exists before adding any repo settings. If it
does not exist, BUG with the message that one cannot add settings for an
uninitialized repository. If it does exist, proceed with adding repo
settings.

Additionally, ensure the above BUG is not triggered when users pass the -h
flag by adding a check for the repository to the checkout and pack-objects
builtins.

Finally, ensure the above BUG is not triggered for commit-graph by
returning early if the git directory does not exist.

Signed-off-by: Lessley Dennington <lessleydennington@gmail.com>
---
 builtin/checkout.c     | 6 ++++--
 builtin/pack-objects.c | 9 ++++++---
 commit-graph.c         | 5 ++++-
 repo-settings.c        | 3 +++
 4 files changed, 17 insertions(+), 6 deletions(-)

Comments

Elijah Newren Nov. 23, 2021, 7:41 a.m. UTC | #1
On Mon, Nov 22, 2021 at 2:42 PM Lessley Dennington via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Lessley Dennington <lessleydennington@gmail.com>
>
> Check whether git dir exists before adding any repo settings. If it
> does not exist, BUG with the message that one cannot add settings for an
> uninitialized repository. If it does exist, proceed with adding repo
> settings.
>
> Additionally, ensure the above BUG is not triggered when users pass the -h
> flag by adding a check for the repository to the checkout and pack-objects
> builtins.

Why only checkout and pack-objects?  Why don't the -h flags to all of
the following need it as well?:

$ git grep -l prepare_repo_settings | grep builtin/
builtin/add.c
builtin/blame.c
builtin/checkout.c
builtin/commit.c
builtin/diff.c
builtin/fetch.c
builtin/gc.c
builtin/merge.c
builtin/pack-objects.c
builtin/rebase.c
builtin/reset.c
builtin/revert.c
builtin/sparse-checkout.c
builtin/update-index.c

If none of these need it, was it because they put
prepare_repo_settings() calls after some other basic checks had been
done so more do not have to be added?  If so, is there a similar place
in checkout and pack-objects where their calls to
prepare_repo_settings() can be moved?  (Looking ahead, it appears you
moved some code in patch 2 to do something like this.  Are the similar
moves that could be done here?)

> Finally, ensure the above BUG is not triggered for commit-graph by
> returning early if the git directory does not exist.

If commit-graph needs a special case to avoid triggering the BUG,
wouldn't several of these need it too?:

$ git grep -l prepare_repo_settings | grep -v builtin/
commit-graph.c
fetch-negotiator.c
merge-recursive.c
midx.c
read-cache.c
repo-settings.c
repository.c
repository.h
sparse-index.c
t/helper/test-read-cache.c
t/helper/test-read-graph.c
unpack-trees.c

or are their calls to prepare_repo_settings() only done after gitdir
setup?  If the latter, perhaps the commit-graph function calls could
be moved after gitdir setup too to avoid the need to do extra checks
in it?

> Signed-off-by: Lessley Dennington <lessleydennington@gmail.com>
> ---
>  builtin/checkout.c     | 6 ++++--
>  builtin/pack-objects.c | 9 ++++++---
>  commit-graph.c         | 5 ++++-
>  repo-settings.c        | 3 +++
>  4 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index 8c69dcdf72a..31632b07888 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>
>         git_config(git_checkout_config, opts);
>
> -       prepare_repo_settings(the_repository);
> -       the_repository->settings.command_requires_full_index = 0;
> +       if (startup_info->have_repository) {
> +               prepare_repo_settings(the_repository);
> +               the_repository->settings.command_requires_full_index = 0;
> +       }
>
>         opts->track = BRANCH_TRACK_UNSPECIFIED;
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 1a3dd445f83..45dc2258dc7 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -3976,9 +3976,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>         read_replace_refs = 0;
>
>         sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
> -       prepare_repo_settings(the_repository);
> -       if (sparse < 0)
> -               sparse = the_repository->settings.pack_use_sparse;
> +
> +       if (startup_info->have_repository) {
> +               prepare_repo_settings(the_repository);
> +               if (sparse < 0)
> +                       sparse = the_repository->settings.pack_use_sparse;
> +       }
>
>         reset_pack_idx_option(&pack_idx_opts);
>         git_config(git_pack_config, NULL);
> diff --git a/commit-graph.c b/commit-graph.c
> index 2706683acfe..265c010122e 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -632,10 +632,13 @@ static int prepare_commit_graph(struct repository *r)
>         struct object_directory *odb;
>
>         /*
> +        * Early return if there is no git dir or if the commit graph is
> +        * disabled.
> +        *
>          * This must come before the "already attempted?" check below, because
>          * we want to disable even an already-loaded graph file.
>          */
> -       if (r->commit_graph_disabled)
> +       if (!r->gitdir || r->commit_graph_disabled)
>                 return 0;
>
>         if (r->objects->commit_graph_attempted)
> diff --git a/repo-settings.c b/repo-settings.c
> index b93e91a212e..00ca5571a1a 100644
> --- a/repo-settings.c
> +++ b/repo-settings.c
> @@ -17,6 +17,9 @@ void prepare_repo_settings(struct repository *r)
>         char *strval;
>         int manyfiles;
>
> +       if (!r->gitdir)
> +               BUG("Cannot add settings for uninitialized repository");
> +
>         if (r->settings.initialized++)
>                 return;

I'm not what the BUG() is trying to help us catch, but I'm worried
that there are many additional places that now need workarounds to
avoid triggering bugs.
Lessley Dennington Nov. 23, 2021, 2:52 p.m. UTC | #2
On 11/22/21 11:41 PM, Elijah Newren wrote:
> On Mon, Nov 22, 2021 at 2:42 PM Lessley Dennington via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>>
>> From: Lessley Dennington <lessleydennington@gmail.com>
>>
>> Check whether git dir exists before adding any repo settings. If it
>> does not exist, BUG with the message that one cannot add settings for an
>> uninitialized repository. If it does exist, proceed with adding repo
>> settings.
>>
>> Additionally, ensure the above BUG is not triggered when users pass the -h
>> flag by adding a check for the repository to the checkout and pack-objects
>> builtins.
> 
> Why only checkout and pack-objects?  Why don't the -h flags to all of
> the following need it as well?:
> 
> $ git grep -l prepare_repo_settings | grep builtin/
> builtin/add.c
> builtin/blame.c
> builtin/checkout.c
> builtin/commit.c
> builtin/diff.c
> builtin/fetch.c
> builtin/gc.c
> builtin/merge.c
> builtin/pack-objects.c
> builtin/rebase.c
> builtin/reset.c
> builtin/revert.c
> builtin/sparse-checkout.c
> builtin/update-index.c
> 
> If none of these need it, was it because they put
> prepare_repo_settings() calls after some other basic checks had been
> done so more do not have to be added?  If so, is there a similar place
> in checkout and pack-objects where their calls to
> prepare_repo_settings() can be moved?  (Looking ahead, it appears you
> moved some code in patch 2 to do something like this.  Are the similar
> moves that could be done here?)
> 
Thank you for the quick feedback. Yes, I believe there are similar moves 
that can be done here. I was attempting to be explicit about the case I'm 
guarding against, but you're right - it shouldn't be done one way for some 
builtins and another way for others.
>> Finally, ensure the above BUG is not triggered for commit-graph by
>> returning early if the git directory does not exist.
> 
> If commit-graph needs a special case to avoid triggering the BUG,
> wouldn't several of these need it too?:
> 
> $ git grep -l prepare_repo_settings | grep -v builtin/
> commit-graph.c
> fetch-negotiator.c
> merge-recursive.c
> midx.c
> read-cache.c
> repo-settings.c
> repository.c
> repository.h
> sparse-index.c
> t/helper/test-read-cache.c
> t/helper/test-read-graph.c
> unpack-trees.c
> 
> or are their calls to prepare_repo_settings() only done after gitdir
> setup?  If the latter, perhaps the commit-graph function calls could
> be moved after gitdir setup too to avoid the need to do extra checks
> in it?
> 
Agreed, I can move this as well.
>> Signed-off-by: Lessley Dennington <lessleydennington@gmail.com>
>> ---
>>   builtin/checkout.c     | 6 ++++--
>>   builtin/pack-objects.c | 9 ++++++---
>>   commit-graph.c         | 5 ++++-
>>   repo-settings.c        | 3 +++
>>   4 files changed, 17 insertions(+), 6 deletions(-)
>>
>> diff --git a/builtin/checkout.c b/builtin/checkout.c
>> index 8c69dcdf72a..31632b07888 100644
>> --- a/builtin/checkout.c
>> +++ b/builtin/checkout.c
>> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>>
>>          git_config(git_checkout_config, opts);
>>
>> -       prepare_repo_settings(the_repository);
>> -       the_repository->settings.command_requires_full_index = 0;
>> +       if (startup_info->have_repository) {
>> +               prepare_repo_settings(the_repository);
>> +               the_repository->settings.command_requires_full_index = 0;
>> +       }
>>
>>          opts->track = BRANCH_TRACK_UNSPECIFIED;
>>
>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>> index 1a3dd445f83..45dc2258dc7 100644
>> --- a/builtin/pack-objects.c
>> +++ b/builtin/pack-objects.c
>> @@ -3976,9 +3976,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>>          read_replace_refs = 0;
>>
>>          sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
>> -       prepare_repo_settings(the_repository);
>> -       if (sparse < 0)
>> -               sparse = the_repository->settings.pack_use_sparse;
>> +
>> +       if (startup_info->have_repository) {
>> +               prepare_repo_settings(the_repository);
>> +               if (sparse < 0)
>> +                       sparse = the_repository->settings.pack_use_sparse;
>> +       }
>>
>>          reset_pack_idx_option(&pack_idx_opts);
>>          git_config(git_pack_config, NULL);
>> diff --git a/commit-graph.c b/commit-graph.c
>> index 2706683acfe..265c010122e 100644
>> --- a/commit-graph.c
>> +++ b/commit-graph.c
>> @@ -632,10 +632,13 @@ static int prepare_commit_graph(struct repository *r)
>>          struct object_directory *odb;
>>
>>          /*
>> +        * Early return if there is no git dir or if the commit graph is
>> +        * disabled.
>> +        *
>>           * This must come before the "already attempted?" check below, because
>>           * we want to disable even an already-loaded graph file.
>>           */
>> -       if (r->commit_graph_disabled)
>> +       if (!r->gitdir || r->commit_graph_disabled)
>>                  return 0;
>>
>>          if (r->objects->commit_graph_attempted)
>> diff --git a/repo-settings.c b/repo-settings.c
>> index b93e91a212e..00ca5571a1a 100644
>> --- a/repo-settings.c
>> +++ b/repo-settings.c
>> @@ -17,6 +17,9 @@ void prepare_repo_settings(struct repository *r)
>>          char *strval;
>>          int manyfiles;
>>
>> +       if (!r->gitdir)
>> +               BUG("Cannot add settings for uninitialized repository");
>> +
>>          if (r->settings.initialized++)
>>                  return;
> 
> I'm not what the BUG() is trying to help us catch, but I'm worried
> that there are many additional places that now need workarounds to
> avoid triggering bugs.
> 
I see your point. We're trying to make sure we catch issues like the 
nongit scenario I overlooked in diff in earlier versions of this series. 
But if we feel this change is too disruptive and will likely cause issues 
beyond those I've fixed to ensure our tests pass, I can remove.
Junio C Hamano Nov. 23, 2021, 11:39 p.m. UTC | #3
"Lessley Dennington via GitGitGadget" <gitgitgadget@gmail.com>
writes:

> From: Lessley Dennington <lessleydennington@gmail.com>
>
> Check whether git dir exists before adding any repo settings. If it
> does not exist, BUG with the message that one cannot add settings for an
> uninitialized repository. If it does exist, proceed with adding repo
> settings.
>
> Additionally, ensure the above BUG is not triggered when users pass the -h
> flag by adding a check for the repository to the checkout and pack-objects
> builtins.
>
> Finally, ensure the above BUG is not triggered for commit-graph by
> returning early if the git directory does not exist.
>
> Signed-off-by: Lessley Dennington <lessleydennington@gmail.com>
> ---
>  builtin/checkout.c     | 6 ++++--
>  builtin/pack-objects.c | 9 ++++++---
>  commit-graph.c         | 5 ++++-
>  repo-settings.c        | 3 +++
>  4 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index 8c69dcdf72a..31632b07888 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>  
>  	git_config(git_checkout_config, opts);
>  
> -	prepare_repo_settings(the_repository);
> -	the_repository->settings.command_requires_full_index = 0;
> +	if (startup_info->have_repository) {
> +		prepare_repo_settings(the_repository);
> +		the_repository->settings.command_requires_full_index = 0;
> +	}

I am kind-a surprised if the control reaches this deep if you are
not in a repository.  In git.c::commands[] list, all three primary
entry points that call checkout_main(), namely, cmd_checkout(),
cmd_restore(), and cmd_switch(), are marked with RUN_SETUP bit,
which makes us call setup_git_directory() even before we call the
cmd_X() function.  And setup_git_directory() dies with "not a git
repository (or any of the parent directories)" outside a repository.

So, how can startup_info->have_repository bit be false if the
control flow reaches here?  Or am I grossly misunderstanding what
you are trying to do?

> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 1a3dd445f83..45dc2258dc7 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -3976,9 +3976,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>  	read_replace_refs = 0;

Ditto wrt RUN_SETUP.

> diff --git a/commit-graph.c b/commit-graph.c
> index 2706683acfe..265c010122e 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -632,10 +632,13 @@ static int prepare_commit_graph(struct repository *r)
>  	struct object_directory *odb;
>  	/*
> +	 * Early return if there is no git dir or if the commit graph is
> +	 * disabled.
> +	 *
>  	 * This must come before the "already attempted?" check below, because
>  	 * we want to disable even an already-loaded graph file.
>  	 */
> -	if (r->commit_graph_disabled)
> +	if (!r->gitdir || r->commit_graph_disabled)
>  		return 0;

I haven't followed the control flow, but this one probably is a good
addition (in other words, unlike cmd_pack_objects(), I cannot convince
myself that r->gitdir will never be NULL here).

> diff --git a/repo-settings.c b/repo-settings.c
> index b93e91a212e..00ca5571a1a 100644
> --- a/repo-settings.c
> +++ b/repo-settings.c
> @@ -17,6 +17,9 @@ void prepare_repo_settings(struct repository *r)
>  	char *strval;
>  	int manyfiles;
>  
> +	if (!r->gitdir)
> +		BUG("Cannot add settings for uninitialized repository");
> +

This is a very good idea.  If I recall correctly, I think I reviewed
a bugfix patch that would have been simplified if we had this check
here.
Lessley Dennington Nov. 24, 2021, 2:41 p.m. UTC | #4
On 11/23/21 3:39 PM, Junio C Hamano wrote:
> "Lessley Dennington via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
> 
>> From: Lessley Dennington <lessleydennington@gmail.com>
>>
>> Check whether git dir exists before adding any repo settings. If it
>> does not exist, BUG with the message that one cannot add settings for an
>> uninitialized repository. If it does exist, proceed with adding repo
>> settings.
>>
>> Additionally, ensure the above BUG is not triggered when users pass the -h
>> flag by adding a check for the repository to the checkout and pack-objects
>> builtins.
>>
>> Finally, ensure the above BUG is not triggered for commit-graph by
>> returning early if the git directory does not exist.
>>
>> Signed-off-by: Lessley Dennington <lessleydennington@gmail.com>
>> ---
>>   builtin/checkout.c     | 6 ++++--
>>   builtin/pack-objects.c | 9 ++++++---
>>   commit-graph.c         | 5 ++++-
>>   repo-settings.c        | 3 +++
>>   4 files changed, 17 insertions(+), 6 deletions(-)
>>
>> diff --git a/builtin/checkout.c b/builtin/checkout.c
>> index 8c69dcdf72a..31632b07888 100644
>> --- a/builtin/checkout.c
>> +++ b/builtin/checkout.c
>> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>>   
>>   	git_config(git_checkout_config, opts);
>>   
>> -	prepare_repo_settings(the_repository);
>> -	the_repository->settings.command_requires_full_index = 0;
>> +	if (startup_info->have_repository) {
>> +		prepare_repo_settings(the_repository);
>> +		the_repository->settings.command_requires_full_index = 0;
>> +	}
> 
> I am kind-a surprised if the control reaches this deep if you are
> not in a repository.  In git.c::commands[] list, all three primary
> entry points that call checkout_main(), namely, cmd_checkout(),
> cmd_restore(), and cmd_switch(), are marked with RUN_SETUP bit,
> which makes us call setup_git_directory() even before we call the
> cmd_X() function.  And setup_git_directory() dies with "not a git
> repository (or any of the parent directories)" outside a repository.
> 
> So, how can startup_info->have_repository bit be false if the
> control flow reaches here?  Or am I grossly misunderstanding what
> you are trying to do?
> 
This was in reaction to the t0012-help.sh tests failing with the
new BUG in prepare_repo_settings. However, Elijah pointed out
that it's a better idea to move prepare_repo_settings farther
down (after parse_options) instead. So I will be issuing that
change as part of v5.
>> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
>> index 1a3dd445f83..45dc2258dc7 100644
>> --- a/builtin/pack-objects.c
>> +++ b/builtin/pack-objects.c
>> @@ -3976,9 +3976,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
>>   	read_replace_refs = 0;
> 
> Ditto wrt RUN_SETUP.
> 
Updating with the same change as checkout for v5.
>> diff --git a/commit-graph.c b/commit-graph.c
>> index 2706683acfe..265c010122e 100644
>> --- a/commit-graph.c
>> +++ b/commit-graph.c
>> @@ -632,10 +632,13 @@ static int prepare_commit_graph(struct repository *r)
>>   	struct object_directory *odb;
>>   	/*
>> +	 * Early return if there is no git dir or if the commit graph is
>> +	 * disabled.
>> +	 *
>>   	 * This must come before the "already attempted?" check below, because
>>   	 * we want to disable even an already-loaded graph file.
>>   	 */
>> -	if (r->commit_graph_disabled)
>> +	if (!r->gitdir || r->commit_graph_disabled)
>>   		return 0;
> 
> I haven't followed the control flow, but this one probably is a good
> addition (in other words, unlike cmd_pack_objects(), I cannot convince
> myself that r->gitdir will never be NULL here).
> 
>> diff --git a/repo-settings.c b/repo-settings.c
>> index b93e91a212e..00ca5571a1a 100644
>> --- a/repo-settings.c
>> +++ b/repo-settings.c
>> @@ -17,6 +17,9 @@ void prepare_repo_settings(struct repository *r)
>>   	char *strval;
>>   	int manyfiles;
>>   
>> +	if (!r->gitdir)
>> +		BUG("Cannot add settings for uninitialized repository");
>> +
> 
> This is a very good idea.  If I recall correctly, I think I reviewed
> a bugfix patch that would have been simplified if we had this check
> here.
>Lessley
Junio C Hamano Nov. 24, 2021, 6:23 p.m. UTC | #5
Lessley Dennington <lessleydennington@gmail.com> writes:

>>> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>>>     	git_config(git_checkout_config, opts);
>>>   -	prepare_repo_settings(the_repository);
>>> -	the_repository->settings.command_requires_full_index = 0;
>>> +	if (startup_info->have_repository) {
>>> +		prepare_repo_settings(the_repository);
>>> +		the_repository->settings.command_requires_full_index = 0;
>>> +	}
>> I am kind-a surprised if the control reaches this deep if you are
>> not in a repository.  In git.c::commands[] list, all three primary
>> entry points that call checkout_main(), namely, cmd_checkout(),
>> cmd_restore(), and cmd_switch(), are marked with RUN_SETUP bit,
>> which makes us call setup_git_directory() even before we call the
>> cmd_X() function.  And setup_git_directory() dies with "not a git
>> repository (or any of the parent directories)" outside a repository.
>> So, how can startup_info->have_repository bit be false if the
>> control flow reaches here?  Or am I grossly misunderstanding what
>> you are trying to do?
>> 
> This was in reaction to the t0012-help.sh tests failing with the
> new BUG in prepare_repo_settings. However, Elijah pointed out that
> it's a better idea to move prepare_repo_settings farther down
> (after parse_options) instead. So I will be issuing that change as
> part of v5.

I forgot that "git foo -h" for a builtin command 'foo' calls
run_builtin() but bypasses the RUN_SETUP and NEED_WORK_TREE handling
before it in turn calls cmd_foo().  We expect a call in cmd_foo() to
parse_options() emit a short-help and exit.

So, yes, there is a way to reach this point in the codeflow without
being in a repository (or even when in a repository, we may have
chosen not to realize it).  Feels ugly.

Now a bit of tangent.

I wonder if it is a problem to completely bypass RUN_SETUP in such a
case.  In general, we read the configuration to tweak the hardcoded
default behaviour, and then further override it by parsing command
line options.  In order to read configuration fully, we'd need to
know where the repository is.  So the start-up sequence must be in
this order:

 - discover where the repository is (either gently or with a hard failure)
 - read the configuration files
 - call parse_options()

And by completly bypassing RUN_SETUP, we are not reading per-repo
settings from the configuration files.

Something along this line (note: there is an always-taken if block
to reduce the patch noise for this illustration), perhaps.




 git.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git i/git.c w/git.c
index 5ff21be21f..50e258508e 100644
--- i/git.c
+++ w/git.c
@@ -421,25 +421,30 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	int status, help;
 	struct stat st;
 	const char *prefix;
+	int run_setup = (p->option & (RUN_SETUP | RUN_SETUP_GENTLY));
 
 	prefix = NULL;
 	help = argc == 2 && !strcmp(argv[1], "-h");
-	if (!help) {
-		if (p->option & RUN_SETUP)
+	if (help && (run_setup & RUN_SETUP))
+		/* demote to GENTLY to allow 'git cmd -h' outside repo */
+		run_setup = RUN_SETUP_GENTLY;
+
+	if (1) {
+		if (run_setup & RUN_SETUP)
 			prefix = setup_git_directory();
-		else if (p->option & RUN_SETUP_GENTLY) {
+		else if (run_setup & RUN_SETUP_GENTLY) {
 			int nongit_ok;
 			prefix = setup_git_directory_gently(&nongit_ok);
 		}
 		precompose_argv_prefix(argc, argv, NULL);
-		if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY) &&
+		if (use_pager == -1 && run_setup &&
 		    !(p->option & DELAY_PAGER_CONFIG))
 			use_pager = check_pager_config(p->cmd);
 		if (use_pager == -1 && p->option & USE_PAGER)
 			use_pager = 1;
 
-		if ((p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) &&
-		    startup_info->have_repository) /* get_git_dir() may set up repo, avoid that */
+		if (run_setup && startup_info->have_repository)
+			/* get_git_dir() may set up repo, avoid that */
 			trace_repo_setup(prefix);
 	}
 	commit_pager_choice();
Lessley Dennington Nov. 29, 2021, 11:38 p.m. UTC | #6
On 11/24/21 10:23 AM, Junio C Hamano wrote:
> Lessley Dennington <lessleydennington@gmail.com> writes:
> 
>>>> @@ -1588,8 +1588,10 @@ static int checkout_main(int argc, const char **argv, const char *prefix,
>>>>      	git_config(git_checkout_config, opts);
>>>>    -	prepare_repo_settings(the_repository);
>>>> -	the_repository->settings.command_requires_full_index = 0;
>>>> +	if (startup_info->have_repository) {
>>>> +		prepare_repo_settings(the_repository);
>>>> +		the_repository->settings.command_requires_full_index = 0;
>>>> +	}
>>> I am kind-a surprised if the control reaches this deep if you are
>>> not in a repository.  In git.c::commands[] list, all three primary
>>> entry points that call checkout_main(), namely, cmd_checkout(),
>>> cmd_restore(), and cmd_switch(), are marked with RUN_SETUP bit,
>>> which makes us call setup_git_directory() even before we call the
>>> cmd_X() function.  And setup_git_directory() dies with "not a git
>>> repository (or any of the parent directories)" outside a repository.
>>> So, how can startup_info->have_repository bit be false if the
>>> control flow reaches here?  Or am I grossly misunderstanding what
>>> you are trying to do?
>>>
>> This was in reaction to the t0012-help.sh tests failing with the
>> new BUG in prepare_repo_settings. However, Elijah pointed out that
>> it's a better idea to move prepare_repo_settings farther down
>> (after parse_options) instead. So I will be issuing that change as
>> part of v5.
> 
> I forgot that "git foo -h" for a builtin command 'foo' calls
> run_builtin() but bypasses the RUN_SETUP and NEED_WORK_TREE handling
> before it in turn calls cmd_foo().  We expect a call in cmd_foo() to
> parse_options() emit a short-help and exit.
> 
> So, yes, there is a way to reach this point in the codeflow without
> being in a repository (or even when in a repository, we may have
> chosen not to realize it).  Feels ugly.
> 
> Now a bit of tangent.
> 
> I wonder if it is a problem to completely bypass RUN_SETUP in such a
> case.  In general, we read the configuration to tweak the hardcoded
> default behaviour, and then further override it by parsing command
> line options.  In order to read configuration fully, we'd need to
> know where the repository is.  So the start-up sequence must be in
> this order:
> 
>   - discover where the repository is (either gently or with a hard failure)
>   - read the configuration files
>   - call parse_options()
> 
> And by completly bypassing RUN_SETUP, we are not reading per-repo
> settings from the configuration files.
> 
> Something along this line (note: there is an always-taken if block
> to reduce the patch noise for this illustration), perhaps.
> 
> 
> 
> 
>   git.c | 17 +++++++++++------
>   1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git i/git.c w/git.c
> index 5ff21be21f..50e258508e 100644
> --- i/git.c
> +++ w/git.c
> @@ -421,25 +421,30 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
>   	int status, help;
>   	struct stat st;
>   	const char *prefix;
> +	int run_setup = (p->option & (RUN_SETUP | RUN_SETUP_GENTLY));
>   
>   	prefix = NULL;
>   	help = argc == 2 && !strcmp(argv[1], "-h");
> -	if (!help) {
> -		if (p->option & RUN_SETUP)
> +	if (help && (run_setup & RUN_SETUP))
> +		/* demote to GENTLY to allow 'git cmd -h' outside repo */
> +		run_setup = RUN_SETUP_GENTLY;
> +
> +	if (1) {
> +		if (run_setup & RUN_SETUP)
>   			prefix = setup_git_directory();
> -		else if (p->option & RUN_SETUP_GENTLY) {
> +		else if (run_setup & RUN_SETUP_GENTLY) {
>   			int nongit_ok;
>   			prefix = setup_git_directory_gently(&nongit_ok);
>   		}
>   		precompose_argv_prefix(argc, argv, NULL);
> -		if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY) &&
> +		if (use_pager == -1 && run_setup &&
>   		    !(p->option & DELAY_PAGER_CONFIG))
>   			use_pager = check_pager_config(p->cmd);
>   		if (use_pager == -1 && p->option & USE_PAGER)
>   			use_pager = 1;
>   
> -		if ((p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) &&
> -		    startup_info->have_repository) /* get_git_dir() may set up repo, avoid that */
> +		if (run_setup && startup_info->have_repository)
> +			/* get_git_dir() may set up repo, avoid that */
>   			trace_repo_setup(prefix);
>   	}
>   	commit_pager_choice();
> 
This is cool! I applied it locally, and it seems to be working well. I
will plan to replace my changes to checkout and pack-objects with this
for v5.

Lessley
Junio C Hamano Nov. 30, 2021, 6:32 a.m. UTC | #7
Lessley Dennington <lessleydennington@gmail.com> writes:

> This is cool! I applied it locally, and it seems to be working well. I
> will plan to replace my changes to checkout and pack-objects with this
> for v5.

I didn't write it to replace the changes you were preparing, though.

What the patch is meant to solve is that "git checkout -h" in a
repository, whose .git/config has some custom configuration
variables that affect how the short help text is shown, were
ignoring that per-repository settings by not even checking if we are
in a repository at all.

When "-h" is in effect, and if you are outside a repository, this
would cause the setup_git_directory_gently() function to be called,
and we'd reach checkout_main() without having discovered repository.
I do not think it removes the need for your "only when startup-info
says we have repository, do these" safety.  At least, I didn't write
the patch to remove that need.
Lessley Dennington Nov. 30, 2021, 11:25 p.m. UTC | #8
On 11/29/21 10:32 PM, Junio C Hamano wrote:
> Lessley Dennington <lessleydennington@gmail.com> writes:
> 
>> This is cool! I applied it locally, and it seems to be working well. I
>> will plan to replace my changes to checkout and pack-objects with this
>> for v5.
> 
> I didn't write it to replace the changes you were preparing, though.
> 
> What the patch is meant to solve is that "git checkout -h" in a
> repository, whose .git/config has some custom configuration
> variables that affect how the short help text is shown, were
> ignoring that per-repository settings by not even checking if we are
> in a repository at all.
> 
> When "-h" is in effect, and if you are outside a repository, this
> would cause the setup_git_directory_gently() function to be called,
> and we'd reach checkout_main() without having discovered repository.
> I do not think it removes the need for your "only when startup-info
> says we have repository, do these" safety.  At least, I didn't write
> the patch to remove that need.
> 
To be clear, I am not replacing the change in repo-settings.c to BUG
if we're trying to prepare settings for an uninitialized repository.
I just removed the conditional I had added around the
prepare_repo_settings method in checkout and pack-objects, since the
failing tests for those builtins started passing with the application
of this patch.
diff mbox series

Patch

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 8c69dcdf72a..31632b07888 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -1588,8 +1588,10 @@  static int checkout_main(int argc, const char **argv, const char *prefix,
 
 	git_config(git_checkout_config, opts);
 
-	prepare_repo_settings(the_repository);
-	the_repository->settings.command_requires_full_index = 0;
+	if (startup_info->have_repository) {
+		prepare_repo_settings(the_repository);
+		the_repository->settings.command_requires_full_index = 0;
+	}
 
 	opts->track = BRANCH_TRACK_UNSPECIFIED;
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 1a3dd445f83..45dc2258dc7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3976,9 +3976,12 @@  int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	read_replace_refs = 0;
 
 	sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
-	prepare_repo_settings(the_repository);
-	if (sparse < 0)
-		sparse = the_repository->settings.pack_use_sparse;
+
+	if (startup_info->have_repository) {
+		prepare_repo_settings(the_repository);
+		if (sparse < 0)
+			sparse = the_repository->settings.pack_use_sparse;
+	}
 
 	reset_pack_idx_option(&pack_idx_opts);
 	git_config(git_pack_config, NULL);
diff --git a/commit-graph.c b/commit-graph.c
index 2706683acfe..265c010122e 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -632,10 +632,13 @@  static int prepare_commit_graph(struct repository *r)
 	struct object_directory *odb;
 
 	/*
+	 * Early return if there is no git dir or if the commit graph is
+	 * disabled.
+	 *
 	 * This must come before the "already attempted?" check below, because
 	 * we want to disable even an already-loaded graph file.
 	 */
-	if (r->commit_graph_disabled)
+	if (!r->gitdir || r->commit_graph_disabled)
 		return 0;
 
 	if (r->objects->commit_graph_attempted)
diff --git a/repo-settings.c b/repo-settings.c
index b93e91a212e..00ca5571a1a 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -17,6 +17,9 @@  void prepare_repo_settings(struct repository *r)
 	char *strval;
 	int manyfiles;
 
+	if (!r->gitdir)
+		BUG("Cannot add settings for uninitialized repository");
+
 	if (r->settings.initialized++)
 		return;