diff mbox series

[v3,1/8] for-each-ref: add --stdin option

Message ID f9e80e233f1ca26f07f9db49261e54b7deaf83b8.1678902343.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series ref-filter: ahead/behind counting, faster --merged option | expand

Commit Message

Derrick Stolee March 15, 2023, 5:45 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

When a user wishes to input a large list of patterns to 'git
for-each-ref' (likely a long list of exact refs) there are frequently
system limits on the number of command-line arguments.

Add a new --stdin option to instead read the patterns from standard
input. Add tests that check that any unrecognized arguments are
considered an error when --stdin is provided. Also, an empty pattern
list is interpreted as the complete ref set.

When reading from stdin, we populate the filter.name_patterns array
dynamically as opposed to pointing to the 'argv' array directly. This is
simple when using a strvec, as it is NULL-terminated in the same way. We
then free the memory directly from the strvec.

Helped-by: Phillip Wood <phillip.wood123@gmail.com>
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Documentation/git-for-each-ref.txt |  7 +++++-
 builtin/for-each-ref.c             | 21 ++++++++++++++++-
 t/t6300-for-each-ref.sh            | 37 ++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

Comments

Jeff King March 15, 2023, 6:06 p.m. UTC | #1
On Wed, Mar 15, 2023 at 05:45:36PM +0000, Derrick Stolee via GitGitGadget wrote:

> @@ -75,7 +79,21 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
>  	filter.ignore_case = icase;
>  
> -	filter.name_patterns = argv;
> +	if (from_stdin) {
> +		struct strbuf line = STRBUF_INIT;
> +
> +		if (argv[0])
> +			die(_("unknown arguments supplied with --stdin"));
> +
> +		while (strbuf_getline(&line, stdin) != EOF)
> +			strvec_push(&vec, line.buf);
> +
> +		/* vec.v is NULL-terminated, just like 'argv'. */
> +		filter.name_patterns = vec.v;
> +	} else {
> +		filter.name_patterns = argv;
> +	}

Now that you aren't detaching the "line" strbuf in each iteration of the
loop, it needs to eventually be cleaned up. strbuf_getline() will
_reset() it, which is good, but at the end we'd need a strbuf_release()
or it will leak.

-Peff
Junio C Hamano March 15, 2023, 7:14 p.m. UTC | #2
Jeff King <peff@peff.net> writes:

> On Wed, Mar 15, 2023 at 05:45:36PM +0000, Derrick Stolee via GitGitGadget wrote:
>
>> @@ -75,7 +79,21 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>>  	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
>>  	filter.ignore_case = icase;
>>  
>> -	filter.name_patterns = argv;
>> +	if (from_stdin) {
>> +		struct strbuf line = STRBUF_INIT;
>> +
>> +		if (argv[0])
>> +			die(_("unknown arguments supplied with --stdin"));
>> +
>> +		while (strbuf_getline(&line, stdin) != EOF)
>> +			strvec_push(&vec, line.buf);
>> +
>> +		/* vec.v is NULL-terminated, just like 'argv'. */
>> +		filter.name_patterns = vec.v;
>> +	} else {
>> +		filter.name_patterns = argv;
>> +	}
>
> Now that you aren't detaching the "line" strbuf in each iteration of the
> loop, it needs to eventually be cleaned up. strbuf_getline() will
> _reset() it, which is good, but at the end we'd need a strbuf_release()
> or it will leak.

Nicely spotted.

Thanks.
Jonathan Tan March 15, 2023, 10:41 p.m. UTC | #3
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
> @@ -75,7 +79,21 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
>  	filter.ignore_case = icase;
>  
> -	filter.name_patterns = argv;
> +	if (from_stdin) {
> +		struct strbuf line = STRBUF_INIT;
> +
> +		if (argv[0])
> +			die(_("unknown arguments supplied with --stdin"));

As a reference point, both fetch and send-pack accept input from stdin
too, and use them to augment what's provided on the CLI (and not only
accept them when nothing's provided on the CLI).

Having said that, it's also reasonable to start by prohibiting CLI
arguments when --stdin is given, and we can always relax this later
if needed.
diff mbox series

Patch

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 6da899c6296..ccdc2911bb9 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -9,7 +9,8 @@  SYNOPSIS
 --------
 [verse]
 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl]
-		   [(--sort=<key>)...] [--format=<format>] [<pattern>...]
+		   [(--sort=<key>)...] [--format=<format>]
+		   [ --stdin | <pattern>... ]
 		   [--points-at=<object>]
 		   [--merged[=<object>]] [--no-merged[=<object>]]
 		   [--contains[=<object>]] [--no-contains[=<object>]]
@@ -32,6 +33,10 @@  OPTIONS
 	literally, in the latter case matching completely or from the
 	beginning up to a slash.
 
+--stdin::
+	If `--stdin` is supplied, then the list of patterns is read from
+	standard input instead of from the argument list.
+
 --count=<count>::
 	By default the command shows all refs that match
 	`<pattern>`.  This option makes it stop after showing
diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index 6f62f40d126..4c5f2324793 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -5,6 +5,7 @@ 
 #include "object.h"
 #include "parse-options.h"
 #include "ref-filter.h"
+#include "strvec.h"
 
 static char const * const for_each_ref_usage[] = {
 	N_("git for-each-ref [<options>] [<pattern>]"),
@@ -25,6 +26,8 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	struct ref_format format = REF_FORMAT_INIT;
 	struct strbuf output = STRBUF_INIT;
 	struct strbuf err = STRBUF_INIT;
+	int from_stdin = 0;
+	struct strvec vec = STRVEC_INIT;
 
 	struct option opts[] = {
 		OPT_BIT('s', "shell", &format.quote_style,
@@ -49,6 +52,7 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 		OPT_CONTAINS(&filter.with_commit, N_("print only refs which contain the commit")),
 		OPT_NO_CONTAINS(&filter.no_commit, N_("print only refs which don't contain the commit")),
 		OPT_BOOL(0, "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
+		OPT_BOOL(0, "stdin", &from_stdin, N_("read reference patterns from stdin")),
 		OPT_END(),
 	};
 
@@ -75,7 +79,21 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
 	filter.ignore_case = icase;
 
-	filter.name_patterns = argv;
+	if (from_stdin) {
+		struct strbuf line = STRBUF_INIT;
+
+		if (argv[0])
+			die(_("unknown arguments supplied with --stdin"));
+
+		while (strbuf_getline(&line, stdin) != EOF)
+			strvec_push(&vec, line.buf);
+
+		/* vec.v is NULL-terminated, just like 'argv'. */
+		filter.name_patterns = vec.v;
+	} else {
+		filter.name_patterns = argv;
+	}
+
 	filter.match_as_path = 1;
 	filter_refs(&array, &filter, FILTER_REFS_ALL);
 	ref_array_sort(sorting, &array);
@@ -97,5 +115,6 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	free_commit_list(filter.with_commit);
 	free_commit_list(filter.no_commit);
 	ref_sorting_release(sorting);
+	strvec_clear(&vec);
 	return 0;
 }
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index c466fd989f1..a58053a54c5 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1464,4 +1464,41 @@  sig_crlf="$(printf "%s" "$sig" | append_cr; echo dummy)"
 sig_crlf=${sig_crlf%dummy}
 test_atom refs/tags/fake-sig-crlf contents:signature "$sig_crlf"
 
+test_expect_success 'git for-each-ref --stdin: empty' '
+	>in &&
+	git for-each-ref --format="%(refname)" --stdin <in >actual &&
+	git for-each-ref --format="%(refname)" >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git for-each-ref --stdin: fails if extra args' '
+	>in &&
+	test_must_fail git for-each-ref --format="%(refname)" \
+		--stdin refs/heads/extra <in 2>err &&
+	grep "unknown arguments supplied with --stdin" err
+'
+
+test_expect_success 'git for-each-ref --stdin: matches' '
+	cat >in <<-EOF &&
+	refs/tags/multi*
+	refs/heads/amb*
+	EOF
+
+	cat >expect <<-EOF &&
+	refs/heads/ambiguous
+	refs/tags/multi-ref1-100000-user1
+	refs/tags/multi-ref1-100000-user2
+	refs/tags/multi-ref1-200000-user1
+	refs/tags/multi-ref1-200000-user2
+	refs/tags/multi-ref2-100000-user1
+	refs/tags/multi-ref2-100000-user2
+	refs/tags/multi-ref2-200000-user1
+	refs/tags/multi-ref2-200000-user2
+	refs/tags/multiline
+	EOF
+
+	git for-each-ref --format="%(refname)" --stdin <in >actual &&
+	test_cmp expect actual
+'
+
 test_done