diff mbox series

[v2,1/8] for-each-ref: add --stdin option

Message ID a1d9e0f6ff6660c9264673be18bc24956f74eb9c.1678468864.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series ref-filter: ahead/behind counting, faster --merged option | expand

Commit Message

Derrick Stolee March 10, 2023, 5:20 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

When a user wishes to input a large list of patterns to 'git
for-each-ref' (likely a long list of exact refs) there are frequently
system limits on the number of command-line arguments.

Add a new --stdin option to instead read the patterns from standard
input. Add tests that check that any unrecognized arguments are
considered an error when --stdin is provided. Also, an empty pattern
list is interpreted as the complete ref set.

When reading from stdin, we populate the filter.name_patterns array
dynamically as opposed to pointing to the 'argv' array directly. This
requires a careful cast while freeing the individual strings,
conditioned on the --stdin option.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Documentation/git-for-each-ref.txt |  7 +++++-
 builtin/for-each-ref.c             | 29 ++++++++++++++++++++++-
 t/t6300-for-each-ref.sh            | 37 ++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 2 deletions(-)

Comments

Junio C Hamano March 10, 2023, 6:08 p.m. UTC | #1
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:

> When reading from stdin, we populate the filter.name_patterns array
> dynamically as opposed to pointing to the 'argv' array directly. This
> requires a careful cast while freeing the individual strings,
> conditioned on the --stdin option.

Indeed.  Thanks for carefully describing the concerns you had.

Looking good.  I'll read on.
Phillip Wood March 13, 2023, 10:31 a.m. UTC | #2
Hi Stolee

On 10/03/2023 17:20, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
> 
> When a user wishes to input a large list of patterns to 'git
> for-each-ref' (likely a long list of exact refs) there are frequently
> system limits on the number of command-line arguments.
> 
> Add a new --stdin option to instead read the patterns from standard
> input. Add tests that check that any unrecognized arguments are
> considered an error when --stdin is provided. Also, an empty pattern
> list is interpreted as the complete ref set.
> 
> When reading from stdin, we populate the filter.name_patterns array
> dynamically as opposed to pointing to the 'argv' array directly. This
> requires a careful cast while freeing the individual strings,
> conditioned on the --stdin option.

I think what you've got here is fine, but if you wanted you could 
simplify it by using an strvec. Something like

	struct strvec vec = STRVEC_INIT;

	...

	if (from_stdin) {
		struct strbuf buf = STRBUF_INIT;

		if (argv[0])
			die(_("unknown arguments supplied with --stdin"));

		while (strbuf_getline(&line, stdin) != EOF)
			strvec_push(&vec, buf.buf);

		filter.name_patters = vec.v;
	} else {
		filter.name_patterns = argv;
	}

	...

	strvec_clear(&vec);

gets rid of the manual memory management with ALLOC_GROW() and the need 
to cast filter.name_patterns when free()ing. It is not immediately 
obvious from the name but struct strvec keeps the array NULL terminated.

Best Wishes

Phillip

> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>   Documentation/git-for-each-ref.txt |  7 +++++-
>   builtin/for-each-ref.c             | 29 ++++++++++++++++++++++-
>   t/t6300-for-each-ref.sh            | 37 ++++++++++++++++++++++++++++++
>   3 files changed, 71 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
> index 6da899c6296..ccdc2911bb9 100644
> --- a/Documentation/git-for-each-ref.txt
> +++ b/Documentation/git-for-each-ref.txt
> @@ -9,7 +9,8 @@ SYNOPSIS
>   --------
>   [verse]
>   'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl]
> -		   [(--sort=<key>)...] [--format=<format>] [<pattern>...]
> +		   [(--sort=<key>)...] [--format=<format>]
> +		   [ --stdin | <pattern>... ]
>   		   [--points-at=<object>]
>   		   [--merged[=<object>]] [--no-merged[=<object>]]
>   		   [--contains[=<object>]] [--no-contains[=<object>]]
> @@ -32,6 +33,10 @@ OPTIONS
>   	literally, in the latter case matching completely or from the
>   	beginning up to a slash.
>   
> +--stdin::
> +	If `--stdin` is supplied, then the list of patterns is read from
> +	standard input instead of from the argument list.
> +
>   --count=<count>::
>   	By default the command shows all refs that match
>   	`<pattern>`.  This option makes it stop after showing
> diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
> index 6f62f40d126..e005a7ef3ce 100644
> --- a/builtin/for-each-ref.c
> +++ b/builtin/for-each-ref.c
> @@ -25,6 +25,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>   	struct ref_format format = REF_FORMAT_INIT;
>   	struct strbuf output = STRBUF_INIT;
>   	struct strbuf err = STRBUF_INIT;
> +	int from_stdin = 0;
>   
>   	struct option opts[] = {
>   		OPT_BIT('s', "shell", &format.quote_style,
> @@ -49,6 +50,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>   		OPT_CONTAINS(&filter.with_commit, N_("print only refs which contain the commit")),
>   		OPT_NO_CONTAINS(&filter.no_commit, N_("print only refs which don't contain the commit")),
>   		OPT_BOOL(0, "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
> +		OPT_BOOL(0, "stdin", &from_stdin, N_("read reference patterns from stdin")),
>   		OPT_END(),
>   	};
>   
> @@ -75,7 +77,27 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>   	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
>   	filter.ignore_case = icase;
>   
> -	filter.name_patterns = argv;
> +	if (from_stdin) {
> +		struct strbuf line = STRBUF_INIT;
> +		size_t nr = 0, alloc = 16;
> +
> +		if (argv[0])
> +			die(_("unknown arguments supplied with --stdin"));
> +
> +		CALLOC_ARRAY(filter.name_patterns, alloc);
> +
> +		while (strbuf_getline(&line, stdin) != EOF) {
> +			ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> +			filter.name_patterns[nr++] = strbuf_detach(&line, NULL);
> +		}
> +
> +		/* Add a terminating NULL string. */
> +		ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> +		filter.name_patterns[nr + 1] = NULL;
> +	} else {
> +		filter.name_patterns = argv;
> +	}
> +
>   	filter.match_as_path = 1;
>   	filter_refs(&array, &filter, FILTER_REFS_ALL);
>   	ref_array_sort(sorting, &array);
> @@ -97,5 +119,10 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>   	free_commit_list(filter.with_commit);
>   	free_commit_list(filter.no_commit);
>   	ref_sorting_release(sorting);
> +	if (from_stdin) {
> +		for (size_t i = 0; filter.name_patterns[i]; i++)
> +			free((char *)filter.name_patterns[i]);
> +		free(filter.name_patterns);
> +	}
>   	return 0;
>   }
> diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
> index c466fd989f1..a58053a54c5 100755
> --- a/t/t6300-for-each-ref.sh
> +++ b/t/t6300-for-each-ref.sh
> @@ -1464,4 +1464,41 @@ sig_crlf="$(printf "%s" "$sig" | append_cr; echo dummy)"
>   sig_crlf=${sig_crlf%dummy}
>   test_atom refs/tags/fake-sig-crlf contents:signature "$sig_crlf"
>   
> +test_expect_success 'git for-each-ref --stdin: empty' '
> +	>in &&
> +	git for-each-ref --format="%(refname)" --stdin <in >actual &&
> +	git for-each-ref --format="%(refname)" >expect &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'git for-each-ref --stdin: fails if extra args' '
> +	>in &&
> +	test_must_fail git for-each-ref --format="%(refname)" \
> +		--stdin refs/heads/extra <in 2>err &&
> +	grep "unknown arguments supplied with --stdin" err
> +'
> +
> +test_expect_success 'git for-each-ref --stdin: matches' '
> +	cat >in <<-EOF &&
> +	refs/tags/multi*
> +	refs/heads/amb*
> +	EOF
> +
> +	cat >expect <<-EOF &&
> +	refs/heads/ambiguous
> +	refs/tags/multi-ref1-100000-user1
> +	refs/tags/multi-ref1-100000-user2
> +	refs/tags/multi-ref1-200000-user1
> +	refs/tags/multi-ref1-200000-user2
> +	refs/tags/multi-ref2-100000-user1
> +	refs/tags/multi-ref2-100000-user2
> +	refs/tags/multi-ref2-200000-user1
> +	refs/tags/multi-ref2-200000-user2
> +	refs/tags/multiline
> +	EOF
> +
> +	git for-each-ref --format="%(refname)" --stdin <in >actual &&
> +	test_cmp expect actual
> +'
> +
>   test_done
Derrick Stolee March 13, 2023, 1:33 p.m. UTC | #3
On 3/13/2023 6:31 AM, Phillip Wood wrote:
> Hi Stolee
> 
> On 10/03/2023 17:20, Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <derrickstolee@github.com>
>>
>> When a user wishes to input a large list of patterns to 'git
>> for-each-ref' (likely a long list of exact refs) there are frequently
>> system limits on the number of command-line arguments.
>>
>> Add a new --stdin option to instead read the patterns from standard
>> input. Add tests that check that any unrecognized arguments are
>> considered an error when --stdin is provided. Also, an empty pattern
>> list is interpreted as the complete ref set.
>>
>> When reading from stdin, we populate the filter.name_patterns array
>> dynamically as opposed to pointing to the 'argv' array directly. This
>> requires a careful cast while freeing the individual strings,
>> conditioned on the --stdin option.
> 
> I think what you've got here is fine, but if you wanted you could simplify it by using an strvec. Something like
> 
>     struct strvec vec = STRVEC_INIT;
> 
>     ...
> 
>     if (from_stdin) {
>         struct strbuf buf = STRBUF_INIT;
> 
>         if (argv[0])
>             die(_("unknown arguments supplied with --stdin"));
> 
>         while (strbuf_getline(&line, stdin) != EOF)
>             strvec_push(&vec, buf.buf);
> 
>         filter.name_patters = vec.v;
>     } else {
>         filter.name_patterns = argv;
>     }
> 
>     ...
> 
>     strvec_clear(&vec);
> 
> gets rid of the manual memory management with ALLOC_GROW() and the need to cast filter.name_patterns when free()ing. It is not immediately obvious from the name but struct strvec keeps the array NULL terminated.

Thanks, Philip. I like your version a lot and will use
it in the next version.

-Stolee
Taylor Blau March 13, 2023, 9:10 p.m. UTC | #4
On Mon, Mar 13, 2023 at 09:33:29AM -0400, Derrick Stolee wrote:
> Thanks, Philip. I like your version a lot and will use
> it in the next version.

Ditto. Thanks, both.

Thanks,
Taylor
Ævar Arnfjörð Bjarmason March 15, 2023, 1:37 p.m. UTC | #5
On Fri, Mar 10 2023, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <derrickstolee@github.com>
>
> When a user wishes to input a large list of patterns to 'git
> for-each-ref' (likely a long list of exact refs) there are frequently
> system limits on the number of command-line arguments.

Okey, and the current API assumes you just assign "argv" to this, but...

> When reading from stdin, we populate the filter.name_patterns array
> dynamically as opposed to pointing to the 'argv' array directly. This
> requires a careful cast while freeing the individual strings,
> conditioned on the --stdin option.

..sounds potentially nasty...

> @@ -75,7 +77,27 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
>  	filter.ignore_case = icase;
>  
> -	filter.name_patterns = argv;
> +	if (from_stdin) {
> +		struct strbuf line = STRBUF_INIT;
> +		size_t nr = 0, alloc = 16;
> +
> +		if (argv[0])
> +			die(_("unknown arguments supplied with --stdin"));
> +
> +		CALLOC_ARRAY(filter.name_patterns, alloc);
> +
> +		while (strbuf_getline(&line, stdin) != EOF) {
> +			ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> +			filter.name_patterns[nr++] = strbuf_detach(&line, NULL);
> +		}
> +
> +		/* Add a terminating NULL string. */
> +		ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> +		filter.name_patterns[nr + 1] = NULL;
> +	} else {
> +		filter.name_patterns = argv;
> +	}
> +
>  	filter.match_as_path = 1;
>  	filter_refs(&array, &filter, FILTER_REFS_ALL);
>  	ref_array_sort(sorting, &array);
> @@ -97,5 +119,10 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>  	free_commit_list(filter.with_commit);
>  	free_commit_list(filter.no_commit);
>  	ref_sorting_release(sorting);
> +	if (from_stdin) {
> +		for (size_t i = 0; filter.name_patterns[i]; i++)
> +			free((char *)filter.name_patterns[i]);
> +		free(filter.name_patterns);
> +	}
>  	return 0;
>  }

Why do we need to seemingly re-invent a "struct strvec" here? I tried to
simplify this on top of this (well, "seen"), and we can get rid of all
of this manual memory management & trailing NULL juggling as a result:
	
	diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
	index cf5ba6ffc12..13b75eff28c 100644
	--- a/builtin/for-each-ref.c
	+++ b/builtin/for-each-ref.c
	@@ -7,6 +7,7 @@
	 #include "parse-options.h"
	 #include "ref-filter.h"
	 #include "commit-reach.h"
	+#include "strvec.h"
	 
	 static char const * const for_each_ref_usage[] = {
	 	N_("git for-each-ref [<options>] [<pattern>]"),
	@@ -27,6 +28,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
	 	struct ref_format format = REF_FORMAT_INIT;
	 	struct strbuf output = STRBUF_INIT;
	 	struct strbuf err = STRBUF_INIT;
	+	struct strvec stdin_pat = STRVEC_INIT;
	 	int from_stdin = 0;
	 
	 	struct option opts[] = {
	@@ -81,21 +83,13 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
	 
	 	if (from_stdin) {
	 		struct strbuf line = STRBUF_INIT;
	-		size_t nr = 0, alloc = 16;
	 
	 		if (argv[0])
	 			die(_("unknown arguments supplied with --stdin"));
	 
	-		CALLOC_ARRAY(filter.name_patterns, alloc);
	-
	-		while (strbuf_getline(&line, stdin) != EOF) {
	-			ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
	-			filter.name_patterns[nr++] = strbuf_detach(&line, NULL);
	-		}
	-
	-		/* Add a terminating NULL string. */
	-		ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
	-		filter.name_patterns[nr + 1] = NULL;
	+		while (strbuf_getline(&line, stdin) != EOF)
	+			strvec_push(&stdin_pat, line.buf);
	+		filter.name_patterns = stdin_pat.v;
	 	} else {
	 		filter.name_patterns = argv;
	 	}
	@@ -123,10 +117,6 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
	 	free_commit_list(filter.with_commit);
	 	free_commit_list(filter.no_commit);
	 	ref_sorting_release(sorting);
	-	if (from_stdin) {
	-		for (size_t i = 0; filter.name_patterns[i]; i++)
	-			free(filter.name_patterns[i]);
	-		free(filter.name_patterns);
	-	}
	+	strvec_clear(&stdin_pat);
	 	return 0;
	 }

It *is* an extra copy though, as your implementation re-uses the strbuf
we already allocated.

But presumably that's trivial in this case, and if we care I think we
should resurrect something like [1] instead, i.e. we could just teach
the strvec API to have a strvec_push_nodup(). But I doubt that in this
case it'll matter.

In any case, if you don't want to take this as-is, please fix this so
that we're not reaching into the "filter.name_patterns" and casting its
"const char" to "char".

If we're going to add a hack here that API should instead know how to
free its own resources (so we could clean up the free_commit_list() here
seen in the context), and we could carry some "my argv needs free-ing".

But none of that seems needed in this case, this is just another case
where we can pretend that we have a "normal" argv, and then clean up our
own strvec, no?

1. https://lore.kernel.org/git/65a620b08ef359e29d678497f1b529e3ce6477b1.1673475190.git.gitgitgadget@gmail.com/
Jeff King March 15, 2023, 5:17 p.m. UTC | #6
On Wed, Mar 15, 2023 at 02:37:39PM +0100, Ævar Arnfjörð Bjarmason wrote:

> 	-		CALLOC_ARRAY(filter.name_patterns, alloc);
> 	-
> 	-		while (strbuf_getline(&line, stdin) != EOF) {
> 	-			ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> 	-			filter.name_patterns[nr++] = strbuf_detach(&line, NULL);
> 	-		}
> 	-
> 	-		/* Add a terminating NULL string. */
> 	-		ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
> 	-		filter.name_patterns[nr + 1] = NULL;
> 	+		while (strbuf_getline(&line, stdin) != EOF)
> 	+			strvec_push(&stdin_pat, line.buf);
> 	+		filter.name_patterns = stdin_pat.v;
> 	 	} else {
> 	 		filter.name_patterns = argv;
> 	 	}
> 	@@ -123,10 +117,6 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
> 	 	free_commit_list(filter.with_commit);
> 	 	free_commit_list(filter.no_commit);
> 	 	ref_sorting_release(sorting);
> 	-	if (from_stdin) {
> 	-		for (size_t i = 0; filter.name_patterns[i]; i++)
> 	-			free(filter.name_patterns[i]);
> 	-		free(filter.name_patterns);
> 	-	}
> 	+	strvec_clear(&stdin_pat);
> 	 	return 0;
> 	 }
> 
> It *is* an extra copy though, as your implementation re-uses the strbuf
> we already allocated.

At first I thought you meant "extra allocation" here. But you really do
mean an extra copy of the bytes.

The number of allocations is the same either way. In the original, we
detach the strbuf in each iteration of the loop as it becomes the final
entry in the array, but then have to allocate a new strbuf for the next
iteration. With a strvec, we can reuse the same strbuf over and over,
but make a new allocation when we add it to the strvec.

So yes, we end up with an extra memcpy() of the bytes. But the flip side
is that the final allocations we store in the strvec are correctly
sized, without the extra slop that the strbuf added while reading.

> But presumably that's trivial in this case, and if we care I think we
> should resurrect something like [1] instead, i.e. we could just teach
> the strvec API to have a strvec_push_nodup(). But I doubt that in this
> case it'll matter.

Yeah, I'd agree it is not important either way in this case. But I
wanted to think it through above, just because it's not clear to me that
even in a tight loop, the "allocate buffer and then attach to the
strvec" approach would be the better tradeoff.

I guess it would make sense to wait for a case where it _does_ matter
and then we could experiment with the two approaches. ;)

-Peff
Jeff King March 15, 2023, 5:49 p.m. UTC | #7
On Fri, Mar 10, 2023 at 05:20:56PM +0000, Derrick Stolee via GitGitGadget wrote:

> When a user wishes to input a large list of patterns to 'git
> for-each-ref' (likely a long list of exact refs) there are frequently
> system limits on the number of command-line arguments.
> 
> Add a new --stdin option to instead read the patterns from standard
> input. Add tests that check that any unrecognized arguments are
> considered an error when --stdin is provided. Also, an empty pattern
> list is interpreted as the complete ref set.
> 
> When reading from stdin, we populate the filter.name_patterns array
> dynamically as opposed to pointing to the 'argv' array directly. This
> requires a careful cast while freeing the individual strings,
> conditioned on the --stdin option.

This is a nice feature to have, but I suspect like other pattern
features in Git (e.g., pathspecs), the matching is linear, and thus
pre-expanding the set of refs you're interested in becomes accidentally
quadratic.

And that seems to be the case here. If I have N refs and feed the whole
set as patterns via --stdin:

-- >8 --
for i in 4000 8000 16000 32000; do
  rm -rf repo
  git init -q repo
  (
    cd repo
    git commit --allow-empty -qm foo
    perl -e '
      my ($oid, $n) = @ARGV;
      print "create refs/heads/branch$_ $oid\n" for (1..$n);
    ' $(git rev-parse HEAD) $i |
    git update-ref --stdin
    git for-each-ref --format='%(refname)' >refs
    echo -n "$i: "
    command time -f %U \
      git.compile for-each-ref --stdin <refs 2>&1 >/dev/null
  )
done
-- 8< --

then the result quadruples for every doubling of the refs.

  4000: 0.32
  8000: 1.33
  16000: 5.10
  32000: 20.90

That may or may not be a show-stopper for your use case, and if not,
I don't think it's something we need to address immediately. But we may
want some kind of "literal" mode, that takes in a list of refs rather
than a list of patterns, and does a sorted-merge with the list of
available refs (or uses a hash table, I guess, but for-each-ref also
tries to avoid even being linear in the total number of refs, so you'd
still want to find the lowest/highest to bound the iteration).

-Peff
Junio C Hamano March 15, 2023, 7:24 p.m. UTC | #8
Jeff King <peff@peff.net> writes:

> ... we may
> want some kind of "literal" mode, that takes in a list of refs rather
> than a list of patterns, and does a sorted-merge with the list of
> available refs (or uses a hash table, I guess, but for-each-ref also
> tries to avoid even being linear in the total number of refs, so you'd
> still want to find the lowest/highest to bound the iteration).

Exactly.

I actually was wondering if "literal" mode can just take a list of
<things>, and when a <thing> is not a refname, use it as if it
were. I.e. %(refname) would parrot it, while %(refname:short) would
not shorten and still parrot it, if the <thing> is 73876f4861c, but
something like %(subject) would still work.

For that, I suspect ref-filter.c::filter_refs() would need to learn
a different kind fo finter->kind that iterates over the literal
"refs" that was fed from the standard input, instead of calling
for_each_fullref_in() for the given hierarchy, but the new iterator
should be able to reuse the ref_filter_hander() for the heavy
lifting.
Jeff King March 15, 2023, 7:44 p.m. UTC | #9
On Wed, Mar 15, 2023 at 12:24:18PM -0700, Junio C Hamano wrote:

> Jeff King <peff@peff.net> writes:
> 
> > ... we may
> > want some kind of "literal" mode, that takes in a list of refs rather
> > than a list of patterns, and does a sorted-merge with the list of
> > available refs (or uses a hash table, I guess, but for-each-ref also
> > tries to avoid even being linear in the total number of refs, so you'd
> > still want to find the lowest/highest to bound the iteration).
> 
> Exactly.
> 
> I actually was wondering if "literal" mode can just take a list of
> <things>, and when a <thing> is not a refname, use it as if it
> were. I.e. %(refname) would parrot it, while %(refname:short) would
> not shorten and still parrot it, if the <thing> is 73876f4861c, but
> something like %(subject) would still work.

Yeah, I think that would nicely solve the quadratic issue _and_ the
"we are stuck using only ref tips" issue. I like it.

> For that, I suspect ref-filter.c::filter_refs() would need to learn
> a different kind fo finter->kind that iterates over the literal
> "refs" that was fed from the standard input, instead of calling
> for_each_fullref_in() for the given hierarchy, but the new iterator
> should be able to reuse the ref_filter_hander() for the heavy
> lifting.

Yeah, that sounds about right from my recollection of the code. I
suspect there may be other sharp edges (e.g., asking for %(upstream)
isn't meaningful for a non-ref). But softening those is actually
something I think we want to do in the long run, as it helps with the
long-term goal of sharing pretty-printing code between ref-filter,
cat-file, and pretty.c.

-Peff
diff mbox series

Patch

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 6da899c6296..ccdc2911bb9 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -9,7 +9,8 @@  SYNOPSIS
 --------
 [verse]
 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl]
-		   [(--sort=<key>)...] [--format=<format>] [<pattern>...]
+		   [(--sort=<key>)...] [--format=<format>]
+		   [ --stdin | <pattern>... ]
 		   [--points-at=<object>]
 		   [--merged[=<object>]] [--no-merged[=<object>]]
 		   [--contains[=<object>]] [--no-contains[=<object>]]
@@ -32,6 +33,10 @@  OPTIONS
 	literally, in the latter case matching completely or from the
 	beginning up to a slash.
 
+--stdin::
+	If `--stdin` is supplied, then the list of patterns is read from
+	standard input instead of from the argument list.
+
 --count=<count>::
 	By default the command shows all refs that match
 	`<pattern>`.  This option makes it stop after showing
diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index 6f62f40d126..e005a7ef3ce 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -25,6 +25,7 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	struct ref_format format = REF_FORMAT_INIT;
 	struct strbuf output = STRBUF_INIT;
 	struct strbuf err = STRBUF_INIT;
+	int from_stdin = 0;
 
 	struct option opts[] = {
 		OPT_BIT('s', "shell", &format.quote_style,
@@ -49,6 +50,7 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 		OPT_CONTAINS(&filter.with_commit, N_("print only refs which contain the commit")),
 		OPT_NO_CONTAINS(&filter.no_commit, N_("print only refs which don't contain the commit")),
 		OPT_BOOL(0, "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
+		OPT_BOOL(0, "stdin", &from_stdin, N_("read reference patterns from stdin")),
 		OPT_END(),
 	};
 
@@ -75,7 +77,27 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase);
 	filter.ignore_case = icase;
 
-	filter.name_patterns = argv;
+	if (from_stdin) {
+		struct strbuf line = STRBUF_INIT;
+		size_t nr = 0, alloc = 16;
+
+		if (argv[0])
+			die(_("unknown arguments supplied with --stdin"));
+
+		CALLOC_ARRAY(filter.name_patterns, alloc);
+
+		while (strbuf_getline(&line, stdin) != EOF) {
+			ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
+			filter.name_patterns[nr++] = strbuf_detach(&line, NULL);
+		}
+
+		/* Add a terminating NULL string. */
+		ALLOC_GROW(filter.name_patterns, nr + 1, alloc);
+		filter.name_patterns[nr + 1] = NULL;
+	} else {
+		filter.name_patterns = argv;
+	}
+
 	filter.match_as_path = 1;
 	filter_refs(&array, &filter, FILTER_REFS_ALL);
 	ref_array_sort(sorting, &array);
@@ -97,5 +119,10 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	free_commit_list(filter.with_commit);
 	free_commit_list(filter.no_commit);
 	ref_sorting_release(sorting);
+	if (from_stdin) {
+		for (size_t i = 0; filter.name_patterns[i]; i++)
+			free((char *)filter.name_patterns[i]);
+		free(filter.name_patterns);
+	}
 	return 0;
 }
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index c466fd989f1..a58053a54c5 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1464,4 +1464,41 @@  sig_crlf="$(printf "%s" "$sig" | append_cr; echo dummy)"
 sig_crlf=${sig_crlf%dummy}
 test_atom refs/tags/fake-sig-crlf contents:signature "$sig_crlf"
 
+test_expect_success 'git for-each-ref --stdin: empty' '
+	>in &&
+	git for-each-ref --format="%(refname)" --stdin <in >actual &&
+	git for-each-ref --format="%(refname)" >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'git for-each-ref --stdin: fails if extra args' '
+	>in &&
+	test_must_fail git for-each-ref --format="%(refname)" \
+		--stdin refs/heads/extra <in 2>err &&
+	grep "unknown arguments supplied with --stdin" err
+'
+
+test_expect_success 'git for-each-ref --stdin: matches' '
+	cat >in <<-EOF &&
+	refs/tags/multi*
+	refs/heads/amb*
+	EOF
+
+	cat >expect <<-EOF &&
+	refs/heads/ambiguous
+	refs/tags/multi-ref1-100000-user1
+	refs/tags/multi-ref1-100000-user2
+	refs/tags/multi-ref1-200000-user1
+	refs/tags/multi-ref1-200000-user2
+	refs/tags/multi-ref2-100000-user1
+	refs/tags/multi-ref2-100000-user2
+	refs/tags/multi-ref2-200000-user1
+	refs/tags/multi-ref2-200000-user2
+	refs/tags/multiline
+	EOF
+
+	git for-each-ref --format="%(refname)" --stdin <in >actual &&
+	test_cmp expect actual
+'
+
 test_done