diff mbox series

[v2,04/12] builtin/for-each-repo: remove unnecessary argv copy to plug leak

Message ID 20210725130830.5145-5-andrzej@ahunt.org (mailing list archive)
State Accepted
Commit 2b2999460c7e907dc80593483e1c23ce00b6745c
Headers show
Series [v2,01/12] fmt-merge-msg: free newly allocated temporary strings when done | expand

Commit Message

Andrzej Hunt July 25, 2021, 1:08 p.m. UTC
From: Andrzej Hunt <ajrhunt@google.com>

cmd_for_each_repo() copies argv into args (a strvec), which is later
passed into run_command_on_repo(), which in turn copies that strvec onto
the end of child.args. The initial copy is unnecessary (we never modify
args). We therefore choose to just pass argv directly into
run_command_on_repo(), which lets us avoid the copy and fixes the leak.

LSAN output from t0068:

Direct leak of 192 byte(s) in 1 object(s) allocated from:
    #0 0x7f63bd4ab8b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0)
    #1 0x98d7e6 in xrealloc wrapper.c:126
    #2 0x916914 in strvec_push_nodup strvec.c:19
    #3 0x916a6e in strvec_push strvec.c:26
    #4 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
    #5 0x410dcd in run_builtin git.c:475
    #6 0x410dcd in handle_builtin git.c:729
    #7 0x414087 in run_argv git.c:818
    #8 0x414087 in cmd_main git.c:949
    #9 0x40e9ec in main common-main.c:52
    #10 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)

Indirect leak of 22 byte(s) in 2 object(s) allocated from:
    #0 0x7f63bd445e30 in __interceptor_strdup (/usr/lib64/libasan.so.4+0x76e30)
    #1 0x98d698 in xstrdup wrapper.c:29
    #2 0x916a63 in strvec_push strvec.c:26
    #3 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
    #4 0x410dcd in run_builtin git.c:475
    #5 0x410dcd in handle_builtin git.c:729
    #6 0x414087 in run_argv git.c:818
    #7 0x414087 in cmd_main git.c:949
    #8 0x40e9ec in main common-main.c:52
    #9 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)

See also discussion about the original implementation below - this code
appears to have evolved from a callback explaining the double-strvec-copy
pattern, but there's no strong reason to keep that now:
  https://lore.kernel.org/git/68bbeca5-314b-08ee-ef36-040e3f3814e9@gmail.com/

Signed-off-by: Andrzej Hunt <andrzej@ahunt.org>
---
 builtin/for-each-repo.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

Comments

Junio C Hamano July 26, 2021, 8:02 p.m. UTC | #1
andrzej@ahunt.org writes:

> From: Andrzej Hunt <ajrhunt@google.com>
>
> cmd_for_each_repo() copies argv into args (a strvec), which is later
> passed into run_command_on_repo(), which in turn copies that strvec onto
> the end of child.args. The initial copy is unnecessary (we never modify
> args). We therefore choose to just pass argv directly into
> run_command_on_repo(), which lets us avoid the copy and fixes the leak.

Makes sense.  There is no reason to make these copies.

>
> LSAN output from t0068:
>
> Direct leak of 192 byte(s) in 1 object(s) allocated from:
>     #0 0x7f63bd4ab8b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0)
>     #1 0x98d7e6 in xrealloc wrapper.c:126
>     #2 0x916914 in strvec_push_nodup strvec.c:19
>     #3 0x916a6e in strvec_push strvec.c:26
>     #4 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
>     #5 0x410dcd in run_builtin git.c:475
>     #6 0x410dcd in handle_builtin git.c:729
>     #7 0x414087 in run_argv git.c:818
>     #8 0x414087 in cmd_main git.c:949
>     #9 0x40e9ec in main common-main.c:52
>     #10 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)
>
> Indirect leak of 22 byte(s) in 2 object(s) allocated from:
>     #0 0x7f63bd445e30 in __interceptor_strdup (/usr/lib64/libasan.so.4+0x76e30)
>     #1 0x98d698 in xstrdup wrapper.c:29
>     #2 0x916a63 in strvec_push strvec.c:26
>     #3 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
>     #4 0x410dcd in run_builtin git.c:475
>     #5 0x410dcd in handle_builtin git.c:729
>     #6 0x414087 in run_argv git.c:818
>     #7 0x414087 in cmd_main git.c:949
>     #8 0x40e9ec in main common-main.c:52
>     #9 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)
>
> See also discussion about the original implementation below - this code
> appears to have evolved from a callback explaining the double-strvec-copy
> pattern, but there's no strong reason to keep that now:
>   https://lore.kernel.org/git/68bbeca5-314b-08ee-ef36-040e3f3814e9@gmail.com/
>
> Signed-off-by: Andrzej Hunt <andrzej@ahunt.org>
> ---
>  builtin/for-each-repo.c | 14 ++++----------
>  1 file changed, 4 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/for-each-repo.c b/builtin/for-each-repo.c
> index 52be64a437..fd86e5a861 100644
> --- a/builtin/for-each-repo.c
> +++ b/builtin/for-each-repo.c
> @@ -10,18 +10,16 @@ static const char * const for_each_repo_usage[] = {
>  	NULL
>  };
>  
> -static int run_command_on_repo(const char *path,
> -			       void *cbdata)
> +static int run_command_on_repo(const char *path, int argc, const char ** argv)
>  {
>  	int i;
>  	struct child_process child = CHILD_PROCESS_INIT;
> -	struct strvec *args = (struct strvec *)cbdata;
>  
>  	child.git_cmd = 1;
>  	strvec_pushl(&child.args, "-C", path, NULL);
>  
> -	for (i = 0; i < args->nr; i++)
> -		strvec_push(&child.args, args->v[i]);
> +	for (i = 0; i < argc; i++)
> +		strvec_push(&child.args, argv[i]);
>  
>  	return run_command(&child);
>  }
> @@ -31,7 +29,6 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
>  	static const char *config_key = NULL;
>  	int i, result = 0;
>  	const struct string_list *values;
> -	struct strvec args = STRVEC_INIT;
>  
>  	const struct option options[] = {
>  		OPT_STRING(0, "config", &config_key, N_("config"),
> @@ -45,9 +42,6 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
>  	if (!config_key)
>  		die(_("missing --config=<config>"));
>  
> -	for (i = 0; i < argc; i++)
> -		strvec_push(&args, argv[i]);
> -
>  	values = repo_config_get_value_multi(the_repository,
>  					     config_key);
>  
> @@ -59,7 +53,7 @@ int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
>  		return 0;
>  
>  	for (i = 0; !result && i < values->nr; i++)
> -		result = run_command_on_repo(values->items[i].string, &args);
> +		result = run_command_on_repo(values->items[i].string, argc, argv);
>  
>  	return result;
>  }
diff mbox series

Patch

diff --git a/builtin/for-each-repo.c b/builtin/for-each-repo.c
index 52be64a437..fd86e5a861 100644
--- a/builtin/for-each-repo.c
+++ b/builtin/for-each-repo.c
@@ -10,18 +10,16 @@  static const char * const for_each_repo_usage[] = {
 	NULL
 };
 
-static int run_command_on_repo(const char *path,
-			       void *cbdata)
+static int run_command_on_repo(const char *path, int argc, const char ** argv)
 {
 	int i;
 	struct child_process child = CHILD_PROCESS_INIT;
-	struct strvec *args = (struct strvec *)cbdata;
 
 	child.git_cmd = 1;
 	strvec_pushl(&child.args, "-C", path, NULL);
 
-	for (i = 0; i < args->nr; i++)
-		strvec_push(&child.args, args->v[i]);
+	for (i = 0; i < argc; i++)
+		strvec_push(&child.args, argv[i]);
 
 	return run_command(&child);
 }
@@ -31,7 +29,6 @@  int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
 	static const char *config_key = NULL;
 	int i, result = 0;
 	const struct string_list *values;
-	struct strvec args = STRVEC_INIT;
 
 	const struct option options[] = {
 		OPT_STRING(0, "config", &config_key, N_("config"),
@@ -45,9 +42,6 @@  int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
 	if (!config_key)
 		die(_("missing --config=<config>"));
 
-	for (i = 0; i < argc; i++)
-		strvec_push(&args, argv[i]);
-
 	values = repo_config_get_value_multi(the_repository,
 					     config_key);
 
@@ -59,7 +53,7 @@  int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
 		return 0;
 
 	for (i = 0; !result && i < values->nr; i++)
-		result = run_command_on_repo(values->items[i].string, &args);
+		result = run_command_on_repo(values->items[i].string, argc, argv);
 
 	return result;
 }