diff mbox series

[v2,2/8] submodule--helper: get remote names from any repository

Message ID 20210916103241.62376-3-raykar.ath@gmail.com (mailing list archive)
State Superseded
Headers show
Series submodule: convert the rest of 'update' to C | expand

Commit Message

Atharva Raykar Sept. 16, 2021, 10:32 a.m. UTC
`get_default_remote()` retrieves the name of a remote by resolving the
refs from of the current repository's ref store.

Thus in order to use it for retrieving the remote name of a submodule,
we have to start a new subprocess which runs from the submodule
directory.

Let's instead introduce a function called `repo_get_default_remote()`
which takes any repository object and retrieves the remote accordingly.

`get_default_remote()` is then defined as a call to
`repo_get_default_remote()` with 'the_repository' passed to it.

Now that we have `repo_get_default_remote()`, we no longer have to start
a subprocess that called `submodule--helper get-default-remote` from
within the submodule directory.

So let's make a function called `get_default_remote_submodule()` which
takes a submodule path, and returns the default remote for that
submodule, all within the same process.

We can now use this function to save an unnecessary subprocess spawn in
`sync_submodule()`, and also in the next patch, which will require this
functionality.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Shourya Shukla <periperidip@gmail.com>
Signed-off-by: Atharva Raykar <raykar.ath@gmail.com>
---
 builtin/submodule--helper.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

Comments

Junio C Hamano Sept. 20, 2021, 4:52 p.m. UTC | #1
Atharva Raykar <raykar.ath@gmail.com> writes:

> `get_default_remote()` retrieves the name of a remote by resolving the
> refs from of the current repository's ref store.
>
> Thus in order to use it for retrieving the remote name of a submodule,
> we have to start a new subprocess which runs from the submodule
> directory.
>
> Let's instead introduce a function called `repo_get_default_remote()`
> which takes any repository object and retrieves the remote accordingly.
>
> `get_default_remote()` is then defined as a call to
> `repo_get_default_remote()` with 'the_repository' passed to it.
>
> Now that we have `repo_get_default_remote()`, we no longer have to start
> a subprocess that called `submodule--helper get-default-remote` from
> within the submodule directory.
>
> So let's make a function called `get_default_remote_submodule()` which
> takes a submodule path, and returns the default remote for that
> submodule, all within the same process.
>
> We can now use this function to save an unnecessary subprocess spawn in
> `sync_submodule()`, and also in the next patch, which will require this
> functionality.
>
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: Shourya Shukla <periperidip@gmail.com>
> Signed-off-by: Atharva Raykar <raykar.ath@gmail.com>
> ---
>  builtin/submodule--helper.c | 34 +++++++++++++++++++++++-----------
>  1 file changed, 23 insertions(+), 11 deletions(-)

The above covers a lot of stuff, because this change does a lot of
things ;-)

The commit could be split into 3 logically distinct phases (two
independent, the third depends on the other two):

 - extract the part that uses the run-command() API to run
   "submodule--helper print-default-remote" from sync_submodule()
   and create get_default_remote_submodule() function out of it.

 - move bulk of get_default_remote() into repo_get_default_remote()
   and reimplement the former as a thin wrapper of the latter.

 - using the repo_get_default_remote() created in the second step,
   update the get_default_remote_submodule() created in the first
   step to work in-process without a subprocess.

but a bit larger granularity used here is probably OK.

> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index 97512ccf0b..f6c4fc349b 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -29,11 +29,10 @@
>  typedef void (*each_submodule_fn)(const struct cache_entry *list_item,
>  				  void *cb_data);
>  
> -static char *get_default_remote(void)
> +static char *repo_get_default_remote(struct repository *repo, const char *refname)
>  {
>  	char *dest = NULL, *ret;
>  	struct strbuf sb = STRBUF_INIT;
> -	const char *refname = resolve_ref_unsafe("HEAD", 0, NULL, NULL);
>  
>  	if (!refname)
>  		die(_("No such ref: %s"), "HEAD");
> @@ -46,7 +45,7 @@ static char *get_default_remote(void)
>  		die(_("Expecting a full ref name, got %s"), refname);
>  
>  	strbuf_addf(&sb, "branch.%s.remote", refname);
> -	if (git_config_get_string(sb.buf, &dest))
> +	if (repo_config_get_string(repo, sb.buf, &dest))
>  		ret = xstrdup("origin");
>  	else
>  		ret = dest;
> @@ -55,6 +54,25 @@ static char *get_default_remote(void)
>  	return ret;
>  }
>  
> +static char *get_default_remote_submodule(const char *module_path)
> +{
> +	const char *refname;
> +	const struct submodule *sub;
> +	struct repository subrepo;
> +
> +	refname = refs_resolve_ref_unsafe(get_submodule_ref_store(module_path),
> +					  "HEAD", 0, NULL, NULL);
> +	sub = submodule_from_path(the_repository, null_oid(), module_path);
> +	repo_submodule_init(&subrepo, the_repository, sub);
> +	return repo_get_default_remote(&subrepo, refname);
> +}
> +
> +static char *get_default_remote(void)
> +{
> +	const char *refname = resolve_ref_unsafe("HEAD", 0, NULL, NULL);
> +	return repo_get_default_remote(the_repository, refname);
> +}
> +
>  static int print_default_remote(int argc, const char **argv, const char *prefix)
>  {
>  	char *remote;
> @@ -1369,7 +1387,6 @@ static void sync_submodule(const char *path, const char *prefix,
>  	char *remote_key = NULL;
>  	char *sub_origin_url, *super_config_url, *displaypath;
>  	struct strbuf sb = STRBUF_INIT;
> -	struct child_process cp = CHILD_PROCESS_INIT;
>  	char *sub_config_path = NULL;
>  
>  	if (!is_submodule_active(the_repository, path))
> @@ -1418,14 +1435,9 @@ static void sync_submodule(const char *path, const char *prefix,
>  	if (!is_submodule_populated_gently(path, NULL))
>  		goto cleanup;
>  
> -	prepare_submodule_repo_env(&cp.env_array);
> -	cp.git_cmd = 1;
> -	cp.dir = path;
> -	strvec_pushl(&cp.args, "submodule--helper",
> -		     "print-default-remote", NULL);
> -
>  	strbuf_reset(&sb);
> -	if (capture_command(&cp, &sb, 0))
> +	strbuf_addstr(&sb, get_default_remote_submodule(path));
> +	if (!sb.buf)
>  		die(_("failed to get the default remote for submodule '%s'"),
>  		      path);

There is this line after the post context presented here:

        strbuf_strip_suffix(&sb, "\n");
        remote_key = xstrfmt("remote.%s.url", sb.buf);

The LF was expected to come from the capture_command(), but it no
longer is needed after get_default_remote_submodule() switches to
use the in-process method.  In fact, the use of sb for the purpose
of forming remote_key is not needed.

    tmp = get_default_remote_submodule(path);
    if (!tmp)
	die(_("failed to get..."));
    remote_key = xstrfmt("remote.%s.url", tmp);
    free(tmp);

Yes, I think the new code leaks, and makes it impossible not to
leak, the returned value from get_default_remote_submodule() by
passing the call as an argument to strbuf_addstr().

Both of the two bugs pointed out here would have been easier to spot
if the commits were smaller, I would think, but as I said, a bit
larger granularity used here was still manageable.

Thanks.
Junio C Hamano Sept. 20, 2021, 9:28 p.m. UTC | #2
Atharva Raykar <raykar.ath@gmail.com> writes:

> +static char *get_default_remote_submodule(const char *module_path)
> +{
> +	const char *refname;
> +	const struct submodule *sub;
> +	struct repository subrepo;
> +
> +	refname = refs_resolve_ref_unsafe(get_submodule_ref_store(module_path),
> +					  "HEAD", 0, NULL, NULL);
> +	sub = submodule_from_path(the_repository, null_oid(), module_path);
> +	repo_submodule_init(&subrepo, the_repository, sub);
> +	return repo_get_default_remote(&subrepo, refname);
> +}

This call (and I think there is another call in this file) to
repo_submodule_init() is affected by what Jonathan's 8eb8dcf9
(repository: support unabsorbed in repo_submodule_init, 2021-09-09)
wants to do, namely to lose "struct submodule sub" as a parameter
and instead take the path to the module and the treeish name as
parameters to repo_submodule_init().

I _think_ I resolved the conflict correctly, but please double check
the result when it is pushed out later today, both of you.

Thanks.
Jonathan Tan Sept. 21, 2021, 4:33 p.m. UTC | #3
> Atharva Raykar <raykar.ath@gmail.com> writes:
> 
> > +static char *get_default_remote_submodule(const char *module_path)
> > +{
> > +	const char *refname;
> > +	const struct submodule *sub;
> > +	struct repository subrepo;
> > +
> > +	refname = refs_resolve_ref_unsafe(get_submodule_ref_store(module_path),
> > +					  "HEAD", 0, NULL, NULL);
> > +	sub = submodule_from_path(the_repository, null_oid(), module_path);
> > +	repo_submodule_init(&subrepo, the_repository, sub);
> > +	return repo_get_default_remote(&subrepo, refname);
> > +}
> 
> This call (and I think there is another call in this file) to
> repo_submodule_init() is affected by what Jonathan's 8eb8dcf9
> (repository: support unabsorbed in repo_submodule_init, 2021-09-09)
> wants to do, namely to lose "struct submodule sub" as a parameter
> and instead take the path to the module and the treeish name as
> parameters to repo_submodule_init().
> 
> I _think_ I resolved the conflict correctly, but please double check
> the result when it is pushed out later today, both of you.
> 
> Thanks.

Thanks for calling this out. There are indeed 2 such calls in 33cfc43433
("Merge branch 'ar/submodule-update' into seen", 2021-09-20) and both
look correct.
Atharva Raykar Oct. 3, 2021, 1:22 p.m. UTC | #4
Junio C Hamano <gitster@pobox.com> writes:

> Atharva Raykar <raykar.ath@gmail.com> writes:
>
>> `get_default_remote()` retrieves the name of a remote by resolving the
>> refs from of the current repository's ref store.
>>
>> Thus in order to use it for retrieving the remote name of a submodule,
>> we have to start a new subprocess which runs from the submodule
>> directory.
>>
>> Let's instead introduce a function called `repo_get_default_remote()`
>> which takes any repository object and retrieves the remote accordingly.
>>
>> `get_default_remote()` is then defined as a call to
>> `repo_get_default_remote()` with 'the_repository' passed to it.
>>
>> Now that we have `repo_get_default_remote()`, we no longer have to start
>> a subprocess that called `submodule--helper get-default-remote` from
>> within the submodule directory.
>>
>> So let's make a function called `get_default_remote_submodule()` which
>> takes a submodule path, and returns the default remote for that
>> submodule, all within the same process.
>>
>> We can now use this function to save an unnecessary subprocess spawn in
>> `sync_submodule()`, and also in the next patch, which will require this
>> functionality.
>>
>> Mentored-by: Christian Couder <christian.couder@gmail.com>
>> Mentored-by: Shourya Shukla <periperidip@gmail.com>
>> Signed-off-by: Atharva Raykar <raykar.ath@gmail.com>
>> ---
>>  builtin/submodule--helper.c | 34 +++++++++++++++++++++++-----------
>>  1 file changed, 23 insertions(+), 11 deletions(-)
>
> The above covers a lot of stuff, because this change does a lot of
> things ;-)
>
> The commit could be split into 3 logically distinct phases (two
> independent, the third depends on the other two):
>
>  - extract the part that uses the run-command() API to run
>    "submodule--helper print-default-remote" from sync_submodule()
>    and create get_default_remote_submodule() function out of it.
>
>  - move bulk of get_default_remote() into repo_get_default_remote()
>    and reimplement the former as a thin wrapper of the latter.
>
>  - using the repo_get_default_remote() created in the second step,
>    update the get_default_remote_submodule() created in the first
>    step to work in-process without a subprocess.
>
> but a bit larger granularity used here is probably OK.
>
[...]
>>  	strbuf_reset(&sb);
>> -	if (capture_command(&cp, &sb, 0))
>> +	strbuf_addstr(&sb, get_default_remote_submodule(path));
>> +	if (!sb.buf)
>>  		die(_("failed to get the default remote for submodule '%s'"),
>>  		      path);
>
> There is this line after the post context presented here:
>
>         strbuf_strip_suffix(&sb, "\n");
>         remote_key = xstrfmt("remote.%s.url", sb.buf);
>
> The LF was expected to come from the capture_command(), but it no
> longer is needed after get_default_remote_submodule() switches to
> use the in-process method.  In fact, the use of sb for the purpose
> of forming remote_key is not needed.
>
>     tmp = get_default_remote_submodule(path);
>     if (!tmp)
> 	die(_("failed to get..."));
>     remote_key = xstrfmt("remote.%s.url", tmp);
>     free(tmp);
>
> Yes, I think the new code leaks, and makes it impossible not to
> leak, the returned value from get_default_remote_submodule() by
> passing the call as an argument to strbuf_addstr().

Thanks, this was a good catch.

> Both of the two bugs pointed out here would have been easier to spot
> if the commits were smaller, I would think, but as I said, a bit
> larger granularity used here was still manageable.

My initial series had this patch split up similar to what you suggested,
but I ultimately chose to favour the larger granularity to make the
whole series a bit shorter. This patch definitely towed the line a bit.

> Thanks.
diff mbox series

Patch

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 97512ccf0b..f6c4fc349b 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -29,11 +29,10 @@ 
 typedef void (*each_submodule_fn)(const struct cache_entry *list_item,
 				  void *cb_data);
 
-static char *get_default_remote(void)
+static char *repo_get_default_remote(struct repository *repo, const char *refname)
 {
 	char *dest = NULL, *ret;
 	struct strbuf sb = STRBUF_INIT;
-	const char *refname = resolve_ref_unsafe("HEAD", 0, NULL, NULL);
 
 	if (!refname)
 		die(_("No such ref: %s"), "HEAD");
@@ -46,7 +45,7 @@  static char *get_default_remote(void)
 		die(_("Expecting a full ref name, got %s"), refname);
 
 	strbuf_addf(&sb, "branch.%s.remote", refname);
-	if (git_config_get_string(sb.buf, &dest))
+	if (repo_config_get_string(repo, sb.buf, &dest))
 		ret = xstrdup("origin");
 	else
 		ret = dest;
@@ -55,6 +54,25 @@  static char *get_default_remote(void)
 	return ret;
 }
 
+static char *get_default_remote_submodule(const char *module_path)
+{
+	const char *refname;
+	const struct submodule *sub;
+	struct repository subrepo;
+
+	refname = refs_resolve_ref_unsafe(get_submodule_ref_store(module_path),
+					  "HEAD", 0, NULL, NULL);
+	sub = submodule_from_path(the_repository, null_oid(), module_path);
+	repo_submodule_init(&subrepo, the_repository, sub);
+	return repo_get_default_remote(&subrepo, refname);
+}
+
+static char *get_default_remote(void)
+{
+	const char *refname = resolve_ref_unsafe("HEAD", 0, NULL, NULL);
+	return repo_get_default_remote(the_repository, refname);
+}
+
 static int print_default_remote(int argc, const char **argv, const char *prefix)
 {
 	char *remote;
@@ -1369,7 +1387,6 @@  static void sync_submodule(const char *path, const char *prefix,
 	char *remote_key = NULL;
 	char *sub_origin_url, *super_config_url, *displaypath;
 	struct strbuf sb = STRBUF_INIT;
-	struct child_process cp = CHILD_PROCESS_INIT;
 	char *sub_config_path = NULL;
 
 	if (!is_submodule_active(the_repository, path))
@@ -1418,14 +1435,9 @@  static void sync_submodule(const char *path, const char *prefix,
 	if (!is_submodule_populated_gently(path, NULL))
 		goto cleanup;
 
-	prepare_submodule_repo_env(&cp.env_array);
-	cp.git_cmd = 1;
-	cp.dir = path;
-	strvec_pushl(&cp.args, "submodule--helper",
-		     "print-default-remote", NULL);
-
 	strbuf_reset(&sb);
-	if (capture_command(&cp, &sb, 0))
+	strbuf_addstr(&sb, get_default_remote_submodule(path));
+	if (!sb.buf)
 		die(_("failed to get the default remote for submodule '%s'"),
 		      path);