diff mbox series

[4/6,GSOC] ref-filter: add %(rest) atom and --rest option

Message ID ccdd18ad508824aa206a02c479229d0ede69522d.1622884415.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series ref-filter: add %(raw:textconv) and %(raw:filters) | expand

Commit Message

ZheNing Hu June 5, 2021, 9:13 a.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

In order to let "cat-file --batch=%(rest)" use the ref-filter
interface, add %(rest) atom for ref-filter and --rest option
for "git for-each-ref", "git branch", "git tag" and "git verify-tag".
`--rest` specify a string to replace %(rest) placeholders of
the --format option.

Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-branch.txt       |  6 +++++-
 Documentation/git-for-each-ref.txt | 10 +++++++++-
 Documentation/git-tag.txt          |  4 ++++
 Documentation/git-verify-tag.txt   |  6 +++++-
 builtin/branch.c                   |  5 +++++
 builtin/for-each-ref.c             |  6 ++++++
 builtin/tag.c                      |  6 ++++++
 builtin/verify-tag.c               |  1 +
 ref-filter.c                       | 21 +++++++++++++++++++++
 ref-filter.h                       |  5 ++++-
 t/t3203-branch-output.sh           | 14 ++++++++++++++
 t/t6300-for-each-ref.sh            | 24 ++++++++++++++++++++++++
 t/t7004-tag.sh                     | 10 ++++++++++
 t/t7030-verify-tag.sh              |  8 ++++++++
 14 files changed, 122 insertions(+), 4 deletions(-)

Comments

Hariom verma June 5, 2021, 3:20 p.m. UTC | #1
Hi,

On Sat, Jun 5, 2021 at 2:43 PM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> --- a/builtin/branch.c
> +++ b/builtin/branch.c
> @@ -670,6 +674,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix)
>                         N_("print only branches of the object"), parse_opt_object_name),
>                 OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
>                 OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
> +               OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
>                 OPT_END(),
>         };
>

Although it's not related to this patch. But I just noticed an unusual
extra space(s) before the first argument of `OPT_STRING()`. (above the
line you added)

> --- a/builtin/for-each-ref.c
> +++ b/builtin/for-each-ref.c
> @@ -37,6 +37,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
>
>                 OPT_GROUP(""),
>                 OPT_INTEGER( 0 , "count", &maxcount, N_("show only <n> matched refs")),
> +               OPT_STRING(  0 , "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
>                 OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
>                 OPT__COLOR(&format.use_color, N_("respect format colors")),
>                 OPT_REF_SORT(sorting_tail),

Here too in `OPT_INTEGER()` and `OPT_INTEGER()`.

Also, I don't think these extra space(s) are intended. So you don't
need to imitate them.

> --- a/builtin/tag.c
> +++ b/builtin/tag.c
> @@ -481,6 +486,7 @@ int cmd_tag(int argc, const char **argv, const char *prefix)
>                 OPT_STRING(  0 , "format", &format.format, N_("format"),
>                            N_("format to use for the output")),
>                 OPT__COLOR(&format.use_color, N_("respect format colors")),
> +               OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
>                 OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
>                 OPT_END()
>         };

Here too in the first line.

Thanks,
Hariom
ZheNing Hu June 6, 2021, 4:58 a.m. UTC | #2
Hi,

Hariom verma <hariom18599@gmail.com> 于2021年6月5日周六 下午11:20写道:
>
> Hi,
>
> On Sat, Jun 5, 2021 at 2:43 PM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > --- a/builtin/branch.c
> > +++ b/builtin/branch.c
> > @@ -670,6 +674,7 @@ int cmd_branch(int argc, const char **argv, const char *prefix)
> >                         N_("print only branches of the object"), parse_opt_object_name),
> >                 OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
> >                 OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
> > +               OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
> >                 OPT_END(),
> >         };
> >
>
> Although it's not related to this patch. But I just noticed an unusual
> extra space(s) before the first argument of `OPT_STRING()`. (above the
> line you added)
>

Yeah, I noticed it too.

> > --- a/builtin/for-each-ref.c
> > +++ b/builtin/for-each-ref.c
> > @@ -37,6 +37,7 @@ int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
> >
> >                 OPT_GROUP(""),
> >                 OPT_INTEGER( 0 , "count", &maxcount, N_("show only <n> matched refs")),
> > +               OPT_STRING(  0 , "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
> >                 OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
> >                 OPT__COLOR(&format.use_color, N_("respect format colors")),
> >                 OPT_REF_SORT(sorting_tail),
>
> Here too in `OPT_INTEGER()` and `OPT_INTEGER()`.
>
> Also, I don't think these extra space(s) are intended. So you don't
> need to imitate them.
>

Maybe... I find it's begin at c3428da8 (Make builtin-for-each-ref.c
use parse-opts.)
This is something from 2007. So It may be wrong to imitate its format.
But this format fix work may be can put in good-first-issue. :)

> > --- a/builtin/tag.c
> > +++ b/builtin/tag.c
> > @@ -481,6 +486,7 @@ int cmd_tag(int argc, const char **argv, const char *prefix)
> >                 OPT_STRING(  0 , "format", &format.format, N_("format"),
> >                            N_("format to use for the output")),
> >                 OPT__COLOR(&format.use_color, N_("respect format colors")),
> > +               OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
> >                 OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
> >                 OPT_END()
> >         };
>
> Here too in the first line.
>
> Thanks,
> Hariom

Thanks.
--
ZheNing Hu
Junio C Hamano June 7, 2021, 5:52 a.m. UTC | #3
"ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: ZheNing Hu <adlternative@gmail.com>
>
> In order to let "cat-file --batch=%(rest)" use the ref-filter
> interface, add %(rest) atom for ref-filter and --rest option
> for "git for-each-ref", "git branch", "git tag" and "git verify-tag".
> `--rest` specify a string to replace %(rest) placeholders of
> the --format option.

I cannot think of a sane reason why we need to allow "%(rest)" in
anithing but "cat-file --batch", where a natural source of %(rest)
exists in its input stream (i.e. each input record begins with an
object name to be processed, and the rest of the record can become
"%(rest)").

The "cat-file --batch" thing is much more understandable.  You could
for example:

    git ls-files -s |
    sed -e 's/^[0-7]* \([0-9a-f]*\) [0-3]	/\1 /' |
    git cat-file --batch='%(objectname) %(objecttype) %(rest)'

to massage output from "ls-files -s" like this

    100644 c2f5fe385af1bbc161f6c010bdcf0048ab6671ed 0	.cirrus.yml
    100644 c592dda681fecfaa6bf64fb3f539eafaf4123ed8 0	.clang-format
    100644 f9d819623d832113014dd5d5366e8ee44ac9666a 0	.editorconfig
    ...

into recods of "<objectname> <path>", and each output record will
replay the <path> part from each corresponding input record.

Unless for-each-ref family of commands read the list of refs that it
shows from their standard input (they do not, and I do not think it
makes any sense to teach them to), there is no place to feed the
"rest" information that is associated with each output record.  The
only thing the commands taught about %(rest) by this patch can do is
to parrot the same string into each and every output record.  I am
not seeing what this new feature is attempting to give us.

If anything, I would imagine that it would be a very useful addition
to teach the ref-filter machinery an ability to optionally error out
depending on the caller when the caller attempts to use certain
placeholder.  Then, we can reject "git branch --sort=rest" sensibly,
instead of accepting "git branch --sort=rest --rest=constant", which
is not technically wrong per-se, but smells like a total nonsense from
practical usefulness's point of view.

> -	[--list] [<pattern>...]
> +	[--list] [<pattern>...] [--rest=<rest>]
>  'git branch' [--track | --no-track] [-f] <branchname> [<start-point>]
>  'git branch' (--set-upstream-to=<upstream> | -u <upstream>) [<branchname>]
>  'git branch' --unset-upstream [<branchname>]
> @@ -298,6 +298,10 @@ start-point is either a local or remote-tracking branch.
>  	and the object it points at.  The format is the same as
>  	that of linkgit:git-for-each-ref[1].
>  
> +--rest=<rest>::
> +	If given, the `%(rest)` placeholders in the `--format` option
> +	will be replaced.

If not given, what happens?
ZheNing Hu June 7, 2021, 1:02 p.m. UTC | #4
Junio C Hamano <gitster@pobox.com> 于2021年6月7日周一 下午1:52写道:
>
> "ZheNing Hu via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > In order to let "cat-file --batch=%(rest)" use the ref-filter
> > interface, add %(rest) atom for ref-filter and --rest option
> > for "git for-each-ref", "git branch", "git tag" and "git verify-tag".
> > `--rest` specify a string to replace %(rest) placeholders of
> > the --format option.
>
> I cannot think of a sane reason why we need to allow "%(rest)" in
> anithing but "cat-file --batch", where a natural source of %(rest)
> exists in its input stream (i.e. each input record begins with an
> object name to be processed, and the rest of the record can become
> "%(rest)").
>

First of all, although %(rest) is meaningless in ordinary circumstances,
ref-filter must learn %(rest), it is impossible for us to leave the parsing
of %(rest) in cat-file.c alone.

Then, `--rest` is a strategy that make %(rest) can use in `git for-each-ref`
or `git branch -l`. As you said, it is just a boring placeholder used for string
replacement. We can make it output only empty content, If we really don’t
need `--rest`.

> The "cat-file --batch" thing is much more understandable.  You could
> for example:
>
>     git ls-files -s |
>     sed -e 's/^[0-7]* \([0-9a-f]*\) [0-3]       /\1 /' |
>     git cat-file --batch='%(objectname) %(objecttype) %(rest)'
>

s/[0-3]       /[0-3]\t/

> to massage output from "ls-files -s" like this
>
>     100644 c2f5fe385af1bbc161f6c010bdcf0048ab6671ed 0   .cirrus.yml
>     100644 c592dda681fecfaa6bf64fb3f539eafaf4123ed8 0   .clang-format
>     100644 f9d819623d832113014dd5d5366e8ee44ac9666a 0   .editorconfig
>     ...
>
> into recods of "<objectname> <path>", and each output record will
> replay the <path> part from each corresponding input record.
>

Yeah, the <path> in the input will be treated as "rest".

> Unless for-each-ref family of commands read the list of refs that it
> shows from their standard input (they do not, and I do not think it
> makes any sense to teach them to), there is no place to feed the
> "rest" information that is associated with each output record.  The
> only thing the commands taught about %(rest) by this patch can do is
> to parrot the same string into each and every output record.  I am
> not seeing what this new feature is attempting to give us.
>

"parrot the same string"? I think we should use an empty string here,
"parrot the same string" more like what the "git log --format" family does.

> If anything, I would imagine that it would be a very useful addition
> to teach the ref-filter machinery an ability to optionally error out
> depending on the caller when the caller attempts to use certain
> placeholder.  Then, we can reject "git branch --sort=rest" sensibly,
> instead of accepting "git branch --sort=rest --rest=constant", which
> is not technically wrong per-se, but smells like a total nonsense from
> practical usefulness's point of view.
>

This sounds like it might help `cat-file` to reject some useless atoms
like %(refname). So something like:

$ git for-each-ref --format="%(objectname) %(objectsize)"
--refject-atoms="%(objectsize) %(objectname)"

will fail.

"git for-each-ref" family use hardcoded to reject %(rest).
I can try to achieve this function.

> > -     [--list] [<pattern>...]
> > +     [--list] [<pattern>...] [--rest=<rest>]
> >  'git branch' [--track | --no-track] [-f] <branchname> [<start-point>]
> >  'git branch' (--set-upstream-to=<upstream> | -u <upstream>) [<branchname>]
> >  'git branch' --unset-upstream [<branchname>]
> > @@ -298,6 +298,10 @@ start-point is either a local or remote-tracking branch.
> >       and the object it points at.  The format is the same as
> >       that of linkgit:git-for-each-ref[1].
> >
> > +--rest=<rest>::
> > +     If given, the `%(rest)` placeholders in the `--format` option
> > +     will be replaced.
>
> If not given, what happens?

Will output an empty string.

Hope we can reach an agreement:
delete `--rest` and add `--reject-atoms`. ;-)

Thanks.
--
ZheNing Hu
ZheNing Hu June 7, 2021, 1:18 p.m. UTC | #5
ZheNing Hu <adlternative@gmail.com> 于2021年6月7日周一 下午9:02写道:
>
> Hope we can reach an agreement:
> delete `--rest` and add `--reject-atoms`. ;-)
>

I forget one thing: %(raw:textconv) and %(raw:filters) can use
the value of "--rest" as their <path>. But now if we want delete --rest,
they can not be used for "for-each-ref" family, Git will die with
"missing path for 'xxx'".

> Thanks.
> --
> ZheNing Hu
ZheNing Hu June 8, 2021, 6:16 a.m. UTC | #6
ZheNing Hu <adlternative@gmail.com> 于2021年6月7日周一 下午9:18写道:
>
> ZheNing Hu <adlternative@gmail.com> 于2021年6月7日周一 下午9:02写道:
> >
> > Hope we can reach an agreement:
> > delete `--rest` and add `--reject-atoms`. ;-)
> >
>
> I forget one thing: %(raw:textconv) and %(raw:filters) can use
> the value of "--rest" as their <path>. But now if we want delete --rest,
> they can not be used for "for-each-ref" family, Git will die with
> "missing path for 'xxx'".
>

If we actually delete "--rest", we will have no way to test %(raw:textconv)
and %(raw:filters)... So now I think we can keep --rest (or use
another name --path)
and let "git for-each-ref" family reject %(rest) by default.

Thanks.
--
ZheNing Hu
Junio C Hamano June 8, 2021, 6:50 a.m. UTC | #7
ZheNing Hu <adlternative@gmail.com> writes:

> First of all, although %(rest) is meaningless in ordinary circumstances,
> ref-filter must learn %(rest), it is impossible for us to leave the parsing
> of %(rest) in cat-file.c alone.

Oh, there is no question about that.

> Then, `--rest` is a strategy that make %(rest) can use in `git for-each-ref`
> or `git branch -l`.

If there is no need to expose %(rest) to the users who write
--format for these two commands, it would be much better to detect
attempted use of %(rest) and error out.

> This sounds like it might help `cat-file` to reject some useless atoms
> like %(refname).

Yes.

> So something like:
>
> $ git for-each-ref --format="%(objectname) %(objectsize)"
> --refject-atoms="%(objectsize) %(objectname)"
>
> will fail.

I don't understand.  Why do you even need to add --reject?  Why
would any user would want to use it with for-each-ref?

Without any end-user input, %(rest) for for-each-ref would not make
sense, and %(refname) for cat-file --batch would not make sense, I
would imagine, so there is no need to be able to tell --reject=rest
to for-each-ref.  It is not like giving --no-reject=rest to for-each-ref
and make it interpolate to an empty string is a useful feature anyway,
so I do not see a need for such an option.
Junio C Hamano June 8, 2021, 6:59 a.m. UTC | #8
ZheNing Hu <adlternative@gmail.com> writes:

> ZheNing Hu <adlternative@gmail.com> 于2021年6月7日周一 下午9:18写道:
>>
>> ZheNing Hu <adlternative@gmail.com> 于2021年6月7日周一 下午9:02写道:
>> >
>> > Hope we can reach an agreement:
>> > delete `--rest` and add `--reject-atoms`. ;-)
>> >
>>
>> I forget one thing: %(raw:textconv) and %(raw:filters) can use
>> the value of "--rest" as their <path>. But now if we want delete --rest,
>> they can not be used for "for-each-ref" family, Git will die with
>> "missing path for 'xxx'".
>>
>
> If we actually delete "--rest", we will have no way to test %(raw:textconv)
> and %(raw:filters)... So now I think we can keep --rest (or use
> another name --path)
> and let "git for-each-ref" family reject %(rest) by default.

I didn't read beyond the %(rest) thing, but do we even need
%(raw:textconv) to begin with?  It is totally useless in the context
of for-each-ref because textconv by its nature is tied to attributes
that by definition needs a blob that is sitting at a path, but the
objects for-each-ref and friends visit are mostly commits and tags,
and even for refs that point at a blob, there isn't any "path"
information to pull attribute for.

Is that what you want to add to give "cat-file --batch"?  Even in
the context of "cat-file --batch", you can throw an object name for
a blob to the command, but there is no path for the blob (a blob can
appear at different places in different trees---think "rename), so I
am not sure what benefit you are trying to derive from it.

Thanks.
ZheNing Hu June 8, 2021, 12:32 p.m. UTC | #9
Junio C Hamano <gitster@pobox.com> 于2021年6月8日周二 下午2:50写道:
>
> ZheNing Hu <adlternative@gmail.com> writes:
>
> > First of all, although %(rest) is meaningless in ordinary circumstances,
> > ref-filter must learn %(rest), it is impossible for us to leave the parsing
> > of %(rest) in cat-file.c alone.
>
> Oh, there is no question about that.
>

OK.

>
> I don't understand.  Why do you even need to add --reject?  Why
> would any user would want to use it with for-each-ref?
>
> Without any end-user input, %(rest) for for-each-ref would not make
> sense, and %(refname) for cat-file --batch would not make sense, I
> would imagine, so there is no need to be able to tell --reject=rest
> to for-each-ref.  It is not like giving --no-reject=rest to for-each-ref
> and make it interpolate to an empty string is a useful feature anyway,
> so I do not see a need for such an option.

Yeah... I might have thought it complicated before. We can reject %(rest) in
verify_ref_format() easily. It's like we refused --<lang> before. :)

Thanks.
--
ZheNing Hu
ZheNing Hu June 8, 2021, 12:39 p.m. UTC | #10
Junio C Hamano <gitster@pobox.com> 于2021年6月8日周二 下午2:59写道:
>
> >
> > If we actually delete "--rest", we will have no way to test %(raw:textconv)
> > and %(raw:filters)... So now I think we can keep --rest (or use
> > another name --path)
> > and let "git for-each-ref" family reject %(rest) by default.
>
> I didn't read beyond the %(rest) thing, but do we even need
> %(raw:textconv) to begin with?  It is totally useless in the context
> of for-each-ref because textconv by its nature is tied to attributes
> that by definition needs a blob that is sitting at a path, but the
> objects for-each-ref and friends visit are mostly commits and tags,
> and even for refs that point at a blob, there isn't any "path"
> information to pull attribute for.
>

After thinking about your words, now I think maybe we can leave
%(raw:textconv) and %(raw:filter) after cat-file --batch start using
ref-filter logic, so that we can provide them with suitable tests,
and we don't need `--rest` anymore.

> Is that what you want to add to give "cat-file --batch"?  Even in
> the context of "cat-file --batch", you can throw an object name for
> a blob to the command, but there is no path for the blob (a blob can
> appear at different places in different trees---think "rename), so I
> am not sure what benefit you are trying to derive from it.
>

So I will remove the last two commits.

> Thanks.
>
>

Thanks.
--
ZheNing Hu
Junio C Hamano June 9, 2021, 7 a.m. UTC | #11
Junio C Hamano <gitster@pobox.com> writes:

> Is that what you want to add to give "cat-file --batch"?  Even in
> the context of "cat-file --batch", you can throw an object name for
> a blob to the command, but there is no path for the blob (a blob can
> appear at different places in different trees---think "rename), so I
> am not sure what benefit you are trying to derive from it.

I think I kind-of see what is going on here.  There is

    git cat-file blob --textconv --path="$path" "$blob_object_name"

that allows a blob to be fed to the command, pretend as if it
appears at $path in a tree object and grab attribute for it, and
show the blob contents converted using the textconv filter.  If we
were to mimic it by extending the format based substitutions, a
design consistent with the behaviour is to teach --format=%(raw)
to show the contents after applying the textconv filter instead of
the raw blob contents.

And there is a corresponding

    git cat-file --batch --textconv

The "--path=$path" parameter is omitted when using --batch, as each
object would sit at different path in the tree (so the input stream
would be given as a run of "<blob> <path>" to give each item its own
path).

So to answer my question in the previous message, yes, this is an
attempt to support the "cat-file --textconv".  So in the context of
that command, something may need to be added.  But I do not think it
makes any sense to expose that to for-each-ref and friends, even if
we were to share the internal machinery (after all, sharing of the
internal machinery is a mere means to an end that is to make it
easier to give the same syntax and same behaviour to end users and
is not a goal itself; "because we use the same machinery, the users
have to tolerate that irrelevant %(atoms) are accepted by the parser"
is not making a good excuse for a sloppy implementation).

Having said all that, I somehow doubt that the "--batch=<format>"
was designed to interact sensibly with the "--textconv" option.
builtin/cat-file.c::expand_atom() does not know anything at all that
the data could be modified from the raw contents of the blob, so
--batch="%(contents) %(size)" --textconv, if existed, may show the
conveted contents with size of blob before conversion, or something
incoherent like that.  And if your rewrite using the shared internal
machinery results in a more coherent behaviour, that would be
excellent.  For example, we could imagine that the machinery, when
textconv (or filter) is in use, would first grab the blob contents
and run the requested conversion, and then work solely on that
conveted contents when computing what to fill with %(raw:size) and
other blob-related atoms.

Thanks.
ZheNing Hu June 9, 2021, 12:47 p.m. UTC | #12
Junio C Hamano <gitster@pobox.com> 于2021年6月9日周三 下午3:00写道:
>
> I think I kind-of see what is going on here.  There is
>
>     git cat-file blob --textconv --path="$path" "$blob_object_name"
>
> that allows a blob to be fed to the command, pretend as if it
> appears at $path in a tree object and grab attribute for it, and
> show the blob contents converted using the textconv filter.  If we
> were to mimic it by extending the format based substitutions, a
> design consistent with the behaviour is to teach --format=%(raw)
> to show the contents after applying the textconv filter instead of
> the raw blob contents.
>

Yes, this is exactly what cat-file --textconv does.

> And there is a corresponding
>
>     git cat-file --batch --textconv
>
> The "--path=$path" parameter is omitted when using --batch, as each
> object would sit at different path in the tree (so the input stream
> would be given as a run of "<blob> <path>" to give each item its own
> path).
>

Just like let --batch omitted --path, --rest is meaningless for "for-ech-ref".

> So to answer my question in the previous message, yes, this is an
> attempt to support the "cat-file --textconv".  So in the context of
> that command, something may need to be added.  But I do not think it
> makes any sense to expose that to for-each-ref and friends, even if
> we were to share the internal machinery (after all, sharing of the
> internal machinery is a mere means to an end that is to make it
> easier to give the same syntax and same behaviour to end users and
> is not a goal itself; "because we use the same machinery, the users
> have to tolerate that irrelevant %(atoms) are accepted by the parser"
> is not making a good excuse for a sloppy implementation).
>

Because "git cat-file --batch" will only print the contents of the object once,
so when implements the function of textconv/filters in ref-filter,
we should really consider whether we should let something like
"%(raw) %(raw) %(raw) %(raw:size)" all pass the conversion of textconv/filters.
If it is my previous %(raw:textconv) or %(raw:filter), they can only print
the converted content separately, and we need:

$ git for-ecah-ref --format="%(raw:filters) %(raw:filters)
%(raw:filters) %(raw:filters:size)"

As you said it might be too complicated for the user....

> Having said all that, I somehow doubt that the "--batch=<format>"
> was designed to interact sensibly with the "--textconv" option.
> builtin/cat-file.c::expand_atom() does not know anything at all that
> the data could be modified from the raw contents of the blob, so
> --batch="%(contents) %(size)" --textconv, if existed, may show the
> conveted contents with size of blob before conversion, or something
> incoherent like that.  And if your rewrite using the shared internal
> machinery results in a more coherent behaviour, that would be
> excellent.  For example, we could imagine that the machinery, when
> textconv (or filter) is in use, would first grab the blob contents
> and run the requested conversion, and then work solely on that
> conveted contents when computing what to fill with %(raw:size) and
> other blob-related atoms.
>

And with your --textconv/--filters, we only need:

$ git for-ecah-ref --format="%(raw) %(raw) %(raw) %(raw:size)" --filters

This will be more concise for users. I will try to build --filters, --textconv
for ref-filter . But as stated in the previous reply, it needs to be placed
after the transplant of cat-file --batch (Because --path is useless for
for-each-ref, and at the same time we need proper testing for
--filter/--textconv.)

Thanks, your reply is very reasonable,
--
ZheNing Hu
diff mbox series

Patch

diff --git a/Documentation/git-branch.txt b/Documentation/git-branch.txt
index 94dc9a54f2d7..29fa579e3d8a 100644
--- a/Documentation/git-branch.txt
+++ b/Documentation/git-branch.txt
@@ -15,7 +15,7 @@  SYNOPSIS
 	[--contains [<commit>]] [--no-contains [<commit>]]
 	[--points-at <object>] [--format=<format>]
 	[(-r | --remotes) | (-a | --all)]
-	[--list] [<pattern>...]
+	[--list] [<pattern>...] [--rest=<rest>]
 'git branch' [--track | --no-track] [-f] <branchname> [<start-point>]
 'git branch' (--set-upstream-to=<upstream> | -u <upstream>) [<branchname>]
 'git branch' --unset-upstream [<branchname>]
@@ -298,6 +298,10 @@  start-point is either a local or remote-tracking branch.
 	and the object it points at.  The format is the same as
 	that of linkgit:git-for-each-ref[1].
 
+--rest=<rest>::
+	If given, the `%(rest)` placeholders in the `--format` option
+	will be replaced.
+
 CONFIGURATION
 -------------
 `pager.branch` is only respected when listing branches, i.e., when
diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index 8f8d8cd1e04f..e85b7b19a530 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -10,7 +10,7 @@  SYNOPSIS
 [verse]
 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl]
 		   [(--sort=<key>)...] [--format=<format>] [<pattern>...]
-		   [--points-at=<object>]
+		   [--points-at=<object>] [--rest=<rest>]
 		   [--merged[=<object>]] [--no-merged[=<object>]]
 		   [--contains[=<object>]] [--no-contains[=<object>]]
 
@@ -74,6 +74,10 @@  OPTIONS
 --points-at=<object>::
 	Only list refs which points at the given object.
 
+--rest=<rest>::
+	If given, the `%(rest)` placeholders in the `--format` option
+	will be replaced by its contents.
+
 --merged[=<object>]::
 	Only list refs whose tips are reachable from the
 	specified commit (HEAD if not specified).
@@ -235,6 +239,10 @@  and `date` to extract the named component.  For email fields (`authoremail`,
 without angle brackets, and `:localpart` to get the part before the `@` symbol
 out of the trimmed email.
 
+rest::
+	The placeholder for `--rest` specified string. If no `--rest`, nothing
+	is printed.
+
 The raw data in a object is `raw`.
 
 raw:size::
diff --git a/Documentation/git-tag.txt b/Documentation/git-tag.txt
index 31a97a1b6c5b..3bf6ae7216de 100644
--- a/Documentation/git-tag.txt
+++ b/Documentation/git-tag.txt
@@ -200,6 +200,10 @@  This option is only applicable when listing tags without annotation lines.
 	that of linkgit:git-for-each-ref[1].  When unspecified,
 	defaults to `%(refname:strip=2)`.
 
+--rest=<rest>::
+	If given, the `%(rest)` placeholders in the `--format` option
+	will be replaced.
+
 <tagname>::
 	The name of the tag to create, delete, or describe.
 	The new tag name must pass all checks defined by
diff --git a/Documentation/git-verify-tag.txt b/Documentation/git-verify-tag.txt
index 0b8075dad965..7ba4a6941ab1 100644
--- a/Documentation/git-verify-tag.txt
+++ b/Documentation/git-verify-tag.txt
@@ -8,7 +8,7 @@  git-verify-tag - Check the GPG signature of tags
 SYNOPSIS
 --------
 [verse]
-'git verify-tag' [--format=<format>] <tag>...
+'git verify-tag' [--format=<format>] [--rest=<rest>] <tag>...
 
 DESCRIPTION
 -----------
@@ -27,6 +27,10 @@  OPTIONS
 <tag>...::
 	SHA-1 identifiers of Git tag objects.
 
+--rest=<rest>::
+	If given, the `%(rest)` placeholders in the `--format` option
+	will be replaced.
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/branch.c b/builtin/branch.c
index b23b1d1752af..b03a4c49c1d9 100644
--- a/builtin/branch.c
+++ b/builtin/branch.c
@@ -439,6 +439,10 @@  static void print_ref_list(struct ref_filter *filter, struct ref_sorting *sortin
 	if (verify_ref_format(format))
 		die(_("unable to parse format string"));
 
+	if (format->use_rest)
+		for (i = 0; i < array.nr; i++)
+			array.items[i]->rest = format->rest;
+
 	ref_array_sort(sorting, &array);
 
 	for (i = 0; i < array.nr; i++) {
@@ -670,6 +674,7 @@  int cmd_branch(int argc, const char **argv, const char *prefix)
 			N_("print only branches of the object"), parse_opt_object_name),
 		OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
 		OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
+		OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
 		OPT_END(),
 	};
 
diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c
index 89cb6307d46f..fac7777fd2c0 100644
--- a/builtin/for-each-ref.c
+++ b/builtin/for-each-ref.c
@@ -37,6 +37,7 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 
 		OPT_GROUP(""),
 		OPT_INTEGER( 0 , "count", &maxcount, N_("show only <n> matched refs")),
+		OPT_STRING(  0 , "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
 		OPT_STRING(  0 , "format", &format.format, N_("format"), N_("format to use for the output")),
 		OPT__COLOR(&format.use_color, N_("respect format colors")),
 		OPT_REF_SORT(sorting_tail),
@@ -78,6 +79,11 @@  int cmd_for_each_ref(int argc, const char **argv, const char *prefix)
 	filter.name_patterns = argv;
 	filter.match_as_path = 1;
 	filter_refs(&array, &filter, FILTER_REFS_ALL | FILTER_REFS_INCLUDE_BROKEN);
+
+	if (format.use_rest)
+		for (i = 0; i < array.nr; i++)
+			array.items[i]->rest = format.rest;
+
 	ref_array_sort(sorting, &array);
 
 	if (!maxcount || array.nr < maxcount)
diff --git a/builtin/tag.c b/builtin/tag.c
index 452558ec9575..9e52b3eacb16 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -63,6 +63,11 @@  static int list_tags(struct ref_filter *filter, struct ref_sorting *sorting,
 		die(_("unable to parse format string"));
 	filter->with_commit_tag_algo = 1;
 	filter_refs(&array, filter, FILTER_REFS_TAGS);
+
+	if (format->use_rest)
+		for (i = 0; i < array.nr; i++)
+			array.items[i]->rest = format->rest;
+
 	ref_array_sort(sorting, &array);
 
 	for (i = 0; i < array.nr; i++) {
@@ -481,6 +486,7 @@  int cmd_tag(int argc, const char **argv, const char *prefix)
 		OPT_STRING(  0 , "format", &format.format, N_("format"),
 			   N_("format to use for the output")),
 		OPT__COLOR(&format.use_color, N_("respect format colors")),
+		OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
 		OPT_BOOL('i', "ignore-case", &icase, N_("sorting and filtering are case insensitive")),
 		OPT_END()
 	};
diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c
index f45136a06ba7..00be4899678d 100644
--- a/builtin/verify-tag.c
+++ b/builtin/verify-tag.c
@@ -36,6 +36,7 @@  int cmd_verify_tag(int argc, const char **argv, const char *prefix)
 		OPT__VERBOSE(&verbose, N_("print tag contents")),
 		OPT_BIT(0, "raw", &flags, N_("print raw gpg status output"), GPG_VERIFY_RAW),
 		OPT_STRING(0, "format", &format.format, N_("format"), N_("format to use for the output")),
+		OPT_STRING(0, "rest", &format.rest, N_("rest"), N_("specify %(rest) contents")),
 		OPT_END()
 	};
 
diff --git a/ref-filter.c b/ref-filter.c
index 608e38aa4160..695f6f55e3e3 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -157,6 +157,7 @@  enum atom_type {
 	ATOM_IF,
 	ATOM_THEN,
 	ATOM_ELSE,
+	ATOM_REST,
 };
 
 /*
@@ -559,6 +560,15 @@  static int if_atom_parser(struct ref_format *format, struct used_atom *atom,
 	return 0;
 }
 
+static int rest_atom_parser(struct ref_format *format, struct used_atom *atom,
+			    const char *arg, struct strbuf *err)
+{
+	if (arg)
+		return strbuf_addf_ret(err, -1, _("%%(rest) does not take arguments"));
+	format->use_rest = 1;
+	return 0;
+}
+
 static int head_atom_parser(struct ref_format *format, struct used_atom *atom,
 			    const char *arg, struct strbuf *unused_err)
 {
@@ -615,6 +625,7 @@  static struct {
 	[ATOM_IF] = { "if", SOURCE_NONE, FIELD_STR, if_atom_parser },
 	[ATOM_THEN] = { "then", SOURCE_NONE },
 	[ATOM_ELSE] = { "else", SOURCE_NONE },
+	[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
 	/*
 	 * Please update $__git_ref_fieldlist in git-completion.bash
 	 * when you add new atoms
@@ -1919,6 +1930,12 @@  static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 			v->handler = else_atom_handler;
 			v->s = xstrdup("");
 			continue;
+		} else if (atom_type == ATOM_REST) {
+			if (ref->rest)
+				v->s = xstrdup(ref->rest);
+			else
+				v->s = xstrdup("");
+			continue;
 		} else
 			continue;
 
@@ -2136,6 +2153,7 @@  static struct ref_array_item *new_ref_array_item(const char *refname,
 
 	FLEX_ALLOC_STR(ref, refname, refname);
 	oidcpy(&ref->objectname, oid);
+	ref->rest = NULL;
 
 	return ref;
 }
@@ -2600,6 +2618,9 @@  void pretty_print_ref(const char *name, const struct object_id *oid,
 
 	ref_item = new_ref_array_item(name, oid);
 	ref_item->kind = ref_kind_from_refname(name);
+	if (format->use_rest)
+		ref_item->rest = format->rest;
+
 	if (format_ref_array_item(ref_item, format, &output, &err))
 		die("%s", err.buf);
 	fwrite(output.buf, 1, output.len, stdout);
diff --git a/ref-filter.h b/ref-filter.h
index 74fb423fc89f..9dc07476a584 100644
--- a/ref-filter.h
+++ b/ref-filter.h
@@ -38,6 +38,7 @@  struct ref_sorting {
 
 struct ref_array_item {
 	struct object_id objectname;
+	const char *rest;
 	int flag;
 	unsigned int kind;
 	const char *symref;
@@ -76,14 +77,16 @@  struct ref_format {
 	 * verify_ref_format() afterwards to finalize.
 	 */
 	const char *format;
+	const char *rest;
 	int quote_style;
+	int use_rest;
 	int use_color;
 
 	/* Internal state to ref-filter */
 	int need_color_reset_at_eol;
 };
 
-#define REF_FORMAT_INIT { NULL, 0, -1 }
+#define REF_FORMAT_INIT { NULL, NULL, 0, 0, -1 }
 
 /*  Macros for checking --merged and --no-merged options */
 #define _OPT_MERGED_NO_MERGED(option, filter, h) \
diff --git a/t/t3203-branch-output.sh b/t/t3203-branch-output.sh
index 5325b9f67a00..dc6bf2557088 100755
--- a/t/t3203-branch-output.sh
+++ b/t/t3203-branch-output.sh
@@ -340,6 +340,20 @@  test_expect_success 'git branch --format option' '
 	test_cmp expect actual
 '
 
+test_expect_success 'git branch with --format and --rest option' '
+	cat >expect <<-\EOF &&
+	@Refname is (HEAD detached from fromtag)
+	@Refname is refs/heads/ambiguous
+	@Refname is refs/heads/branch-one
+	@Refname is refs/heads/branch-two
+	@Refname is refs/heads/main
+	@Refname is refs/heads/ref-to-branch
+	@Refname is refs/heads/ref-to-remote
+	EOF
+	git branch --rest="@" --format="%(rest)Refname is %(refname)" >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success 'worktree colors correct' '
 	cat >expect <<-EOF &&
 	* <GREEN>(HEAD detached from fromtag)<RESET>
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 5f66d933ace0..e908b2ca0522 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1187,6 +1187,30 @@  test_expect_success 'basic atom: head contents:trailers' '
 	test_cmp expect actual.clean
 '
 
+test_expect_success 'bacic atom: rest with --rest' '
+	git for-each-ref --format="###refname=%(refname)
+###oid=%(objectname)
+###type=%(objecttype)
+###size=%(objectsize)" refs/heads/main refs/tags/subject-body >expect &&
+	git for-each-ref --rest="###" --format="%(rest)refname=%(refname)
+%(rest)oid=%(objectname)
+%(rest)type=%(objecttype)
+%(rest)size=%(objectsize)" refs/heads/main refs/tags/subject-body >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'bacic atom: *rest with --rest' '
+	git for-each-ref --format="###refname=%(refname)
+###oid=%(objectname)
+###type=%(objecttype)
+###size=%(objectsize)" refs/heads/main refs/tags/subject-body >expect &&
+	git for-each-ref --rest="###" --format="%(*rest)refname=%(refname)
+%(rest)oid=%(objectname)
+%(rest)type=%(objecttype)
+%(rest)size=%(objectsize)" refs/heads/main refs/tags/subject-body >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success 'trailer parsing not fooled by --- line' '
 	git commit --allow-empty -F - <<-\EOF &&
 	this is the subject
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index 2f72c5c6883e..93c4366ff52d 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1998,6 +1998,16 @@  test_expect_success '--format should list tags as per format given' '
 	test_cmp expect actual
 '
 
+test_expect_success 'tag -l with --format and --rest' '
+	cat >expect <<-\EOF &&
+	#refname : refs/tags/v1.0
+	#refname : refs/tags/v1.0.1
+	#refname : refs/tags/v1.1.3
+	EOF
+	git tag -l --rest="#" --format="%(rest)refname : %(refname)" "v1*" >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success "set up color tests" '
 	echo "<RED>v1.0<RESET>" >expect.color &&
 	echo "v1.0" >expect.bare &&
diff --git a/t/t7030-verify-tag.sh b/t/t7030-verify-tag.sh
index 3cefde9602bf..008df00c32d6 100755
--- a/t/t7030-verify-tag.sh
+++ b/t/t7030-verify-tag.sh
@@ -194,6 +194,14 @@  test_expect_success GPG 'verifying tag with --format' '
 	test_cmp expect actual
 '
 
+test_expect_success GPG 'verifying tag with --format and --rest' '
+	cat >expect <<-\EOF &&
+	#tagname : fourth-signed
+	EOF
+	git verify-tag --rest="#" --format="%(rest)tagname : %(tag)" "fourth-signed" >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success GPG 'verifying a forged tag with --format should fail silently' '
 	test_must_fail git verify-tag --format="tagname : %(tag)" $(cat forged1.tag) >actual-forged &&
 	test_must_be_empty actual-forged