diff mbox series

[04/12] builtin/for-each-repo: remove unnecessary argv copy to plug leak

Message ID 20210620151204.19260-5-andrzej@ahunt.org (mailing list archive)
State New, archived
Headers show
Series Fix all leaks in tests t0002-t0099: Part 2 | expand

Commit Message

Andrzej Hunt June 20, 2021, 3:11 p.m. UTC
From: Andrzej Hunt <ajrhunt@google.com>

cmd_for_each_repo() copies argv into args (a strvec), which is later
passed into run_command_on_repo(), which in turn copies that strvec onto
the end of child.args. The initial copy is unnecessary (we never modify
args). We therefore choose to just pass argv directly into
run_command_on_repo(), which lets us avoid the copy and fixes the leak.

LSAN output from t0068:

Direct leak of 192 byte(s) in 1 object(s) allocated from:
    #0 0x7f63bd4ab8b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0)
    #1 0x98d7e6 in xrealloc wrapper.c:126
    #2 0x916914 in strvec_push_nodup strvec.c:19
    #3 0x916a6e in strvec_push strvec.c:26
    #4 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
    #5 0x410dcd in run_builtin git.c:475
    #6 0x410dcd in handle_builtin git.c:729
    #7 0x414087 in run_argv git.c:818
    #8 0x414087 in cmd_main git.c:949
    #9 0x40e9ec in main common-main.c:52
    #10 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)

Indirect leak of 22 byte(s) in 2 object(s) allocated from:
    #0 0x7f63bd445e30 in __interceptor_strdup (/usr/lib64/libasan.so.4+0x76e30)
    #1 0x98d698 in xstrdup wrapper.c:29
    #2 0x916a63 in strvec_push strvec.c:26
    #3 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
    #4 0x410dcd in run_builtin git.c:475
    #5 0x410dcd in handle_builtin git.c:729
    #6 0x414087 in run_argv git.c:818
    #7 0x414087 in cmd_main git.c:949
    #8 0x40e9ec in main common-main.c:52
    #9 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)

See also discussion about the original implementation below - this code
appears to have evolved from a callback explaining the double-strvec-copy
pattern, but there's no strong reason to keep that now:
  https://lore.kernel.org/git/68bbeca5-314b-08ee-ef36-040e3f3814e9@gmail.com/

Signed-off-by: Andrzej Hunt <andrzej@ahunt.org>
---
 builtin/for-each-repo.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

Comments

Elijah Newren June 21, 2021, 8:55 p.m. UTC | #1
Hi,

On Sun, Jun 20, 2021 at 8:14 AM <andrzej@ahunt.org> wrote:
>
> From: Andrzej Hunt <ajrhunt@google.com>
>
> cmd_for_each_repo() copies argv into args (a strvec), which is later
> passed into run_command_on_repo(), which in turn copies that strvec onto
> the end of child.args. The initial copy is unnecessary (we never modify
> args). We therefore choose to just pass argv directly into
> run_command_on_repo(), which lets us avoid the copy and fixes the leak.
>
> LSAN output from t0068:
>
> Direct leak of 192 byte(s) in 1 object(s) allocated from:
>     #0 0x7f63bd4ab8b0 in realloc (/usr/lib64/libasan.so.4+0xdc8b0)
>     #1 0x98d7e6 in xrealloc wrapper.c:126
>     #2 0x916914 in strvec_push_nodup strvec.c:19
>     #3 0x916a6e in strvec_push strvec.c:26
>     #4 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
>     #5 0x410dcd in run_builtin git.c:475
>     #6 0x410dcd in handle_builtin git.c:729
>     #7 0x414087 in run_argv git.c:818
>     #8 0x414087 in cmd_main git.c:949
>     #9 0x40e9ec in main common-main.c:52
>     #10 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)
>
> Indirect leak of 22 byte(s) in 2 object(s) allocated from:
>     #0 0x7f63bd445e30 in __interceptor_strdup (/usr/lib64/libasan.so.4+0x76e30)
>     #1 0x98d698 in xstrdup wrapper.c:29
>     #2 0x916a63 in strvec_push strvec.c:26
>     #3 0x4be4eb in cmd_for_each_repo builtin/for-each-repo.c:49
>     #4 0x410dcd in run_builtin git.c:475
>     #5 0x410dcd in handle_builtin git.c:729
>     #6 0x414087 in run_argv git.c:818
>     #7 0x414087 in cmd_main git.c:949
>     #8 0x40e9ec in main common-main.c:52
>     #9 0x7f63bc9fa349 in __libc_start_main (/lib64/libc.so.6+0x24349)
>
> See also discussion about the original implementation below - this code
> appears to have evolved from a callback explaining the double-strvec-copy
> pattern, but there's no strong reason to keep that now:
>   https://lore.kernel.org/git/68bbeca5-314b-08ee-ef36-040e3f3814e9@gmail.com/

The link you give shows that Stolee was looking forward to reviewing
your patch to fix this issue, so cc'ing him so he can do so.

In general, it helps to cc folks who were involved in earlier
conversations about an area being updated, or who know the code area
well.

> Signed-off-by: Andrzej Hunt <andrzej@ahunt.org>
> ---
>  builtin/for-each-repo.c | 14 ++++----------
>  1 file changed, 4 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/for-each-repo.c b/builtin/for-each-repo.c
> index 52be64a437..fd86e5a861 100644
> --- a/builtin/for-each-repo.c
> +++ b/builtin/for-each-repo.c
> @@ -10,18 +10,16 @@ static const char * const for_each_repo_usage[] = {
>         NULL
>  };
>
> -static int run_command_on_repo(const char *path,
> -                              void *cbdata)
> +static int run_command_on_repo(const char *path, int argc, const char ** argv)
>  {
>         int i;
>         struct child_process child = CHILD_PROCESS_INIT;
> -       struct strvec *args = (struct strvec *)cbdata;
>
>         child.git_cmd = 1;
>         strvec_pushl(&child.args, "-C", path, NULL);
>
> -       for (i = 0; i < args->nr; i++)
> -               strvec_push(&child.args, args->v[i]);
> +       for (i = 0; i < argc; i++)
> +               strvec_push(&child.args, argv[i]);
>
>         return run_command(&child);
>  }
> @@ -29,37 +27,33 @@ static int run_command_on_repo(const char *path,
>  int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
>  {
>         static const char *config_key = NULL;
>         int i, result = 0;
>         const struct string_list *values;
> -       struct strvec args = STRVEC_INIT;
>
>         const struct option options[] = {
>                 OPT_STRING(0, "config", &config_key, N_("config"),
>                            N_("config key storing a list of repository paths")),
>                 OPT_END()
>         };
>
>         argc = parse_options(argc, argv, prefix, options, for_each_repo_usage,
>                              PARSE_OPT_STOP_AT_NON_OPTION);
>
>         if (!config_key)
>                 die(_("missing --config=<config>"));
>
> -       for (i = 0; i < argc; i++)
> -               strvec_push(&args, argv[i]);
> -
>         values = repo_config_get_value_multi(the_repository,
>                                              config_key);
>
>         /*
>          * Do nothing on an empty list, which is equivalent to the case
>          * where the config variable does not exist at all.
>          */
>         if (!values)
>                 return 0;
>
>         for (i = 0; !result && i < values->nr; i++)
> -               result = run_command_on_repo(values->items[i].string, &args);
> +               result = run_command_on_repo(values->items[i].string, argc, argv);
>
>         return result;
>  }
> --
> 2.26.2

Patch makes sense to me
diff mbox series

Patch

diff --git a/builtin/for-each-repo.c b/builtin/for-each-repo.c
index 52be64a437..fd86e5a861 100644
--- a/builtin/for-each-repo.c
+++ b/builtin/for-each-repo.c
@@ -10,18 +10,16 @@  static const char * const for_each_repo_usage[] = {
 	NULL
 };
 
-static int run_command_on_repo(const char *path,
-			       void *cbdata)
+static int run_command_on_repo(const char *path, int argc, const char ** argv)
 {
 	int i;
 	struct child_process child = CHILD_PROCESS_INIT;
-	struct strvec *args = (struct strvec *)cbdata;
 
 	child.git_cmd = 1;
 	strvec_pushl(&child.args, "-C", path, NULL);
 
-	for (i = 0; i < args->nr; i++)
-		strvec_push(&child.args, args->v[i]);
+	for (i = 0; i < argc; i++)
+		strvec_push(&child.args, argv[i]);
 
 	return run_command(&child);
 }
@@ -29,37 +27,33 @@  static int run_command_on_repo(const char *path,
 int cmd_for_each_repo(int argc, const char **argv, const char *prefix)
 {
 	static const char *config_key = NULL;
 	int i, result = 0;
 	const struct string_list *values;
-	struct strvec args = STRVEC_INIT;
 
 	const struct option options[] = {
 		OPT_STRING(0, "config", &config_key, N_("config"),
 			   N_("config key storing a list of repository paths")),
 		OPT_END()
 	};
 
 	argc = parse_options(argc, argv, prefix, options, for_each_repo_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
 	if (!config_key)
 		die(_("missing --config=<config>"));
 
-	for (i = 0; i < argc; i++)
-		strvec_push(&args, argv[i]);
-
 	values = repo_config_get_value_multi(the_repository,
 					     config_key);
 
 	/*
 	 * Do nothing on an empty list, which is equivalent to the case
 	 * where the config variable does not exist at all.
 	 */
 	if (!values)
 		return 0;
 
 	for (i = 0; !result && i < values->nr; i++)
-		result = run_command_on_repo(values->items[i].string, &args);
+		result = run_command_on_repo(values->items[i].string, argc, argv);
 
 	return result;
 }