[7/9] submodule: fetch in submodules git directory instead of in worktree
diff mbox series

Message ID 20180911234951.14129-8-sbeller@google.com
State New
Headers show
Series
  • fetch: make sure submodule oids are fetched
Related show

Commit Message

Stefan Beller Sept. 11, 2018, 11:49 p.m. UTC
This patch started as a refactoring to make 'get_next_submodule' more
readable, but upon doing so, I realized that git-fetch actually doesn't
need to be run in the worktree. So let's run it in the git dir instead.

That should pave the way towards fetching submodules that are currently
not checked out.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule.c                 | 43 ++++++++++++++++++++++++++-----------
 t/t5526-fetch-submodules.sh |  7 +++++-
 2 files changed, 37 insertions(+), 13 deletions(-)

Comments

Junio C Hamano Sept. 12, 2018, 6:36 p.m. UTC | #1
Stefan Beller <sbeller@google.com> writes:

> This patch started as a refactoring to make 'get_next_submodule' more
> readable, but upon doing so, I realized that git-fetch actually doesn't
> need to be run in the worktree. So let's run it in the git dir instead.

It may be clear to the author but not clear to the reader of the
above paragraph that "worktree", "fetch" and "git dir" all refer to
the recursively invoked operation that updates the submodules
repository.  s/git-fetch/"git fetch" for the submodule/ should be
sufficient to help the readers.

> That should pave the way towards fetching submodules that are currently
> not checked out.

Very good.

> +static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
> +{
> +	prepare_submodule_repo_env_no_git_dir(out);
> +	argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
> +}
> +
>  /* Helper function to display the submodule header line prior to the full
>   * summary output. If it can locate the submodule objects directory it will
>   * attempt to lookup both the left and right commits and put them into the
> @@ -1227,6 +1233,27 @@ static int get_fetch_recurse_config(const struct submodule *submodule,
>  	return spf->default_option;
>  }
>  
> +static const char *get_submodule_git_dir(struct repository *r, const char *path)
> +{
> +	struct repository subrepo;
> +	const char *ret;
> +
> +	if (repo_submodule_init(&subrepo, r, path)) {
> +		/* no entry in .gitmodules? */
> +		struct strbuf gitdir = STRBUF_INIT;
> +		strbuf_repo_worktree_path(&gitdir, r, "%s/.git", path);
> +		if (repo_init(&subrepo, gitdir.buf, NULL)) {
> +			strbuf_release(&gitdir);
> +			return NULL;
> +		}

This is for the modern "absorbed" layout?  Do we get a notice and
encouragement to migrate from the historical layout, or there is no
need to (e.g. the migration happens automatically in some other
codepaths)?

> +	}
> +
> +	ret = xstrdup(subrepo.gitdir);
> +	repo_clear(&subrepo);
> +
> +	return ret;
> +}

Returned value from this function is xstrdup()'ed so the caller
owns, not borrows.  There is no need to return "const char *" from
this function.  Also the caller needs to free it once done.

>  static int get_next_submodule(struct child_process *cp,
>  			      struct strbuf *err, void *data, void **task_cb)
>  {
> @@ -1234,8 +1261,6 @@ static int get_next_submodule(struct child_process *cp,
>  	struct submodule_parallel_fetch *spf = data;
>  
>  	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
> -		struct strbuf submodule_path = STRBUF_INIT;
> -		struct strbuf submodule_git_dir = STRBUF_INIT;
>  		struct strbuf submodule_prefix = STRBUF_INIT;
>  		const struct cache_entry *ce = spf->r->index->cache[spf->count];
>  		const char *git_dir, *default_argv;
> @@ -1274,16 +1299,12 @@ static int get_next_submodule(struct child_process *cp,
>  			continue;
>  		}
>  
> -		strbuf_repo_worktree_path(&submodule_path, spf->r, "%s", ce->name);
> -		strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf);
>  		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name);
> -		git_dir = read_gitfile(submodule_git_dir.buf);
> -		if (!git_dir)
> -			git_dir = submodule_git_dir.buf;
> -		if (is_directory(git_dir)) {

In the old code, git_dir came from read_gitfile() which borrowed.

> +		git_dir = get_submodule_git_dir(spf->r, ce->name);

In the new code, we own it, so we'd eventually need to get rid of
it.  How does it happen?

> +		if (git_dir) {
>  			child_process_init(cp);
> -			cp->dir = strbuf_detach(&submodule_path, NULL);
> -			prepare_submodule_repo_env(&cp->env_array);
> +			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
> +			cp->dir = git_dir;

Does cp now own it and cp->dir gets freed once run_processes_parallel()
is done with this task?  Or is cp->dir simply leaking?  The old code
gave the result of strbuf_detach(), so even if cp->dir is leaking,
the leak is not new in this patch.

>  			cp->git_cmd = 1;
>  			if (!spf->quiet)
>  				strbuf_addf(err, "Fetching submodule %s%s\n",
> @@ -1295,8 +1316,6 @@ static int get_next_submodule(struct child_process *cp,
>  			argv_array_push(&cp->args, submodule_prefix.buf);
>  			ret = 1;
>  		}
> -		strbuf_release(&submodule_path);
> -		strbuf_release(&submodule_git_dir);

But if it is a leak, it is easily plugged by freeing git_dir here, I
think.

Thanks.


>  		strbuf_release(&submodule_prefix);
>  		if (ret) {
>  			spf->count++;
> diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
> index 6c2f9b2ba26..42692219a1a 100755
> --- a/t/t5526-fetch-submodules.sh
> +++ b/t/t5526-fetch-submodules.sh
> @@ -566,7 +566,12 @@ test_expect_success 'fetching submodule into a broken repository' '
>  
>  	test_must_fail git -C dst status &&
>  	test_must_fail git -C dst diff &&
> -	test_must_fail git -C dst fetch --recurse-submodules
> +
> +	# git-fetch cannot find the git directory of the submodule,
> +	# so it will do nothing, successfully, as it cannot distinguish between
> +	# this broken submodule and a submodule that was just set active but
> +	# not cloned yet
> +	git -C dst fetch --recurse-submodules
>  '
>  
>  test_expect_success "fetch new commits when submodule got renamed" '
Stefan Beller Sept. 13, 2018, 7:29 p.m. UTC | #2
On Wed, Sep 12, 2018 at 11:36 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Stefan Beller <sbeller@google.com> writes:
>
> > This patch started as a refactoring to make 'get_next_submodule' more
> > readable, but upon doing so, I realized that git-fetch actually doesn't
> > need to be run in the worktree. So let's run it in the git dir instead.
>
> It may be clear to the author but not clear to the reader of the
> above paragraph that "worktree", "fetch" and "git dir" all refer to
> the recursively invoked operation that updates the submodules
> repository.  s/git-fetch/"git fetch" for the submodule/ should be
> sufficient to help the readers.
>
> > That should pave the way towards fetching submodules that are currently
> > not checked out.
>
> Very good.
>
> > +static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
> > +{
> > +     prepare_submodule_repo_env_no_git_dir(out);
> > +     argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
> > +}
> > +
> >  /* Helper function to display the submodule header line prior to the full
> >   * summary output. If it can locate the submodule objects directory it will
> >   * attempt to lookup both the left and right commits and put them into the
> > @@ -1227,6 +1233,27 @@ static int get_fetch_recurse_config(const struct submodule *submodule,
> >       return spf->default_option;
> >  }
> >
> > +static const char *get_submodule_git_dir(struct repository *r, const char *path)
> > +{
> > +     struct repository subrepo;
> > +     const char *ret;
> > +
> > +     if (repo_submodule_init(&subrepo, r, path)) {
> > +             /* no entry in .gitmodules? */
> > +             struct strbuf gitdir = STRBUF_INIT;
> > +             strbuf_repo_worktree_path(&gitdir, r, "%s/.git", path);
> > +             if (repo_init(&subrepo, gitdir.buf, NULL)) {
> > +                     strbuf_release(&gitdir);
> > +                     return NULL;
> > +             }
>
> This is for the modern "absorbed" layout?  Do we get a notice and
> encouragement to migrate from the historical layout, or there is no
> need to (e.g. the migration happens automatically in some other
> codepaths)?

No, the absorbed would also be handled by repo_submodule_init.
I wrote a patch once to migrate repo_submodule_init to take a
"struct *submodule" instead of a path as the third argument, which
would fall in line with this patch as well, I'll dig it up.

Historically git-fetch supported repositories that are not submodules
(but have a gitlink and a working tree in place) as well. That is covered
here. (see comment /* no entry in .gitmodules? */)

> > -             strbuf_release(&submodule_path);
> > -             strbuf_release(&submodule_git_dir);
>
> But if it is a leak, it is easily plugged by freeing git_dir here, I
> think.

Thanks.

Patch
diff mbox series

diff --git a/submodule.c b/submodule.c
index 00a9a3c6b12..1e6781504f0 100644
--- a/submodule.c
+++ b/submodule.c
@@ -481,6 +481,12 @@  void prepare_submodule_repo_env(struct argv_array *out)
 			 DEFAULT_GIT_DIR_ENVIRONMENT);
 }
 
+static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
+{
+	prepare_submodule_repo_env_no_git_dir(out);
+	argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
+}
+
 /* Helper function to display the submodule header line prior to the full
  * summary output. If it can locate the submodule objects directory it will
  * attempt to lookup both the left and right commits and put them into the
@@ -1227,6 +1233,27 @@  static int get_fetch_recurse_config(const struct submodule *submodule,
 	return spf->default_option;
 }
 
+static const char *get_submodule_git_dir(struct repository *r, const char *path)
+{
+	struct repository subrepo;
+	const char *ret;
+
+	if (repo_submodule_init(&subrepo, r, path)) {
+		/* no entry in .gitmodules? */
+		struct strbuf gitdir = STRBUF_INIT;
+		strbuf_repo_worktree_path(&gitdir, r, "%s/.git", path);
+		if (repo_init(&subrepo, gitdir.buf, NULL)) {
+			strbuf_release(&gitdir);
+			return NULL;
+		}
+	}
+
+	ret = xstrdup(subrepo.gitdir);
+	repo_clear(&subrepo);
+
+	return ret;
+}
+
 static int get_next_submodule(struct child_process *cp,
 			      struct strbuf *err, void *data, void **task_cb)
 {
@@ -1234,8 +1261,6 @@  static int get_next_submodule(struct child_process *cp,
 	struct submodule_parallel_fetch *spf = data;
 
 	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
-		struct strbuf submodule_path = STRBUF_INIT;
-		struct strbuf submodule_git_dir = STRBUF_INIT;
 		struct strbuf submodule_prefix = STRBUF_INIT;
 		const struct cache_entry *ce = spf->r->index->cache[spf->count];
 		const char *git_dir, *default_argv;
@@ -1274,16 +1299,12 @@  static int get_next_submodule(struct child_process *cp,
 			continue;
 		}
 
-		strbuf_repo_worktree_path(&submodule_path, spf->r, "%s", ce->name);
-		strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf);
 		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name);
-		git_dir = read_gitfile(submodule_git_dir.buf);
-		if (!git_dir)
-			git_dir = submodule_git_dir.buf;
-		if (is_directory(git_dir)) {
+		git_dir = get_submodule_git_dir(spf->r, ce->name);
+		if (git_dir) {
 			child_process_init(cp);
-			cp->dir = strbuf_detach(&submodule_path, NULL);
-			prepare_submodule_repo_env(&cp->env_array);
+			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+			cp->dir = git_dir;
 			cp->git_cmd = 1;
 			if (!spf->quiet)
 				strbuf_addf(err, "Fetching submodule %s%s\n",
@@ -1295,8 +1316,6 @@  static int get_next_submodule(struct child_process *cp,
 			argv_array_push(&cp->args, submodule_prefix.buf);
 			ret = 1;
 		}
-		strbuf_release(&submodule_path);
-		strbuf_release(&submodule_git_dir);
 		strbuf_release(&submodule_prefix);
 		if (ret) {
 			spf->count++;
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 6c2f9b2ba26..42692219a1a 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -566,7 +566,12 @@  test_expect_success 'fetching submodule into a broken repository' '
 
 	test_must_fail git -C dst status &&
 	test_must_fail git -C dst diff &&
-	test_must_fail git -C dst fetch --recurse-submodules
+
+	# git-fetch cannot find the git directory of the submodule,
+	# so it will do nothing, successfully, as it cannot distinguish between
+	# this broken submodule and a submodule that was just set active but
+	# not cloned yet
+	git -C dst fetch --recurse-submodules
 '
 
 test_expect_success "fetch new commits when submodule got renamed" '