diff mbox series

[v3,03/10] trailer: unify trailer formatting machinery

Message ID 5c7a2354df0f4a29841f9ab8294ead0e1c3b9cf5.1706664145.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Enrich Trailer API | expand

Commit Message

Linus Arver Jan. 31, 2024, 1:22 a.m. UTC
From: Linus Arver <linusa@google.com>

Currently have two functions for formatting trailers exposed in
trailer.h:

    void format_trailers(FILE *outfile, struct list_head *head,
                        const struct process_trailer_options *opts);

    void format_trailers_from_commit(struct strbuf *out, const char *msg,
                                    const struct process_trailer_options *opts);

and previously these functions, although similar enough (even taking the
same process_trailer_options struct pointer), did not build on each
other.

Make format_trailers_from_commit() rely on format_trailers(). Teach
format_trailers() to process trailers with the additional
process_trailer_options fields like opts->key_only which is only used by
format_trailers_from_commit() and not builtin/interpret-trailers.c.
While we're at it, reorder parameters to put the trailer processing
options first, and the out parameter (strbuf we write into) at the end.

This unification will allow us to delete the format_trailer_info() and
print_tok_val() functions in the next patch. They are not deleted here
in order to keep the diff small.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/interpret-trailers.c |   6 +-
 pretty.c                     |   2 +-
 ref-filter.c                 |   2 +-
 trailer.c                    | 176 +++++++++++++++++------------------
 trailer.h                    |  19 ++--
 5 files changed, 98 insertions(+), 107 deletions(-)

Comments

Josh Steadmon Jan. 31, 2024, 8:02 p.m. UTC | #1
On 2024.01.31 01:22, Linus Arver via GitGitGadget wrote:
> This unification will allow us to delete the format_trailer_info() and
> print_tok_val() functions in the next patch. They are not deleted here
> in order to keep the diff small.

Needs to be removed after squashing v2 patch 4 :)
Junio C Hamano Jan. 31, 2024, 8:13 p.m. UTC | #2
"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> Currently have two functions for formatting trailers exposed in
> trailer.h:
>
>     void format_trailers(FILE *outfile, struct list_head *head,
>                         const struct process_trailer_options *opts);
>
>     void format_trailers_from_commit(struct strbuf *out, const char *msg,
>                                     const struct process_trailer_options *opts);
>
> and previously these functions, although similar enough (even taking the
> same process_trailer_options struct pointer), did not build on each
> other.
>
> Make format_trailers_from_commit() rely on format_trailers(). Teach
> format_trailers() to process trailers with the additional
> process_trailer_options fields like opts->key_only which is only used by
> format_trailers_from_commit() and not builtin/interpret-trailers.c.
> While we're at it, reorder parameters to put the trailer processing
> options first, and the out parameter (strbuf we write into) at the end.
>
> This unification will allow us to delete the format_trailer_info() and
> print_tok_val() functions in the next patch. They are not deleted here
> in order to keep the diff small.
>
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Linus Arver <linusa@google.com>
> ---

I am not sure how I helped this particularly (say, compared to all
the other patches on the list from everybody), though.

The updated format_trailers() does so much more than the original
and does so quite differently, it is hard to tell if the existing
caller is getting the same behaviour (modulo obviously it now needs
to prepare a strbuf and print the resulting string in the strbuf,
which is expected) as before.

The conversion for the sole existing caller looks like this, ...

>  	LIST_HEAD(head);
>  	struct strbuf sb = STRBUF_INIT;
> +	struct strbuf trailer_block = STRBUF_INIT;
>  	struct trailer_info info;
> ...
> -	format_trailers(opts, &head, outfile);
> +	/* Print trailer block. */
> +	format_trailers(opts, &head, &trailer_block);
> +	fwrite(trailer_block.buf, 1, trailer_block.len, outfile);
> +	strbuf_release(&trailer_block);

... and it is very straight-forward.


But what it called was this:

> -static void print_tok_val(FILE *outfile, const char *tok, const char *val)
> -{
> -	char c;
> -
> -	if (!tok) {
> -		fprintf(outfile, "%s\n", val);
> -		return;
> -	}
> -
> -	c = last_non_space_char(tok);
> -	if (!c)
> -		return;
> -	if (strchr(separators, c))
> -		fprintf(outfile, "%s%s\n", tok, val);
> -	else
> -		fprintf(outfile, "%s%c %s\n", tok, separators[0], val);
> -}
> -
> -void format_trailers(const struct process_trailer_options *opts,
> -		     struct list_head *trailers, FILE *outfile)
> -{
> -	struct list_head *pos;
> -	struct trailer_item *item;
> -	list_for_each(pos, trailers) {
> -		item = list_entry(pos, struct trailer_item, list);
> -		if ((!opts->trim_empty || strlen(item->value) > 0) &&
> -		    (!opts->only_trailers || item->token))
> -			print_tok_val(outfile, item->token, item->value);
> -	}
> -}

i.e. iterate over trailers, and sometimes call print_tok_val(),
which does not do all that much.  Print only the val, or when tok is
not empty, show "tok: val" while taking into account the possibility
that the last byte of tok may already be the separator.

But the new thing is way more complex.

> +void format_trailers(const struct process_trailer_options *opts,
> +		     struct list_head *trailers,
> +		     struct strbuf *out)
> +{
> +	struct list_head *pos;
> +	struct trailer_item *item;
> +	int need_separator = 0;
> +
> +	list_for_each(pos, trailers) {
> +		item = list_entry(pos, struct trailer_item, list);
> +		if (item->token) {
> +			char c;
> +
> +			struct strbuf tok = STRBUF_INIT;
> +			struct strbuf val = STRBUF_INIT;
> +			strbuf_addstr(&tok, item->token);
> +			strbuf_addstr(&val, item->value);
> +
> +			/*
> +			 * Skip key/value pairs where the value was empty. This
> +			 * can happen from trailers specified without a
> +			 * separator, like `--trailer "Reviewed-by"` (no
> +			 * corresponding value).
> +			 */
> +			if (opts->trim_empty && !strlen(item->value))
> +				continue;

OK, this matches the first condition to stay silent in the original.

> +			if (!opts->filter || opts->filter(&tok, opts->filter_data)) {

For the original caller of format_trailers(), which is the
interpret_trailers(), .filter member is never set, so we always come
here, I presume.  cmd_interpret_trailers(), before calling
interpret_trailers() could affect the following members:

    .in_place .trim_empty .only_trailers .only_input .unfold .no_divider

so we should make sure this new implementation does not change the
behaviour from the original with various combination of these
options.

> +				if (opts->unfold)
> +					unfold_value(&val);

So, ... how would this affect an invocation of

    $ git interpret-trailers --unfold

which would have set opts.unfold to true in cmd_interpret_trailers()
and eventually called process_trailers() with it, which eventually
called print_all(), which conditionally called print_tok_val() but
never gave the val to unfold_value()?

 ... goes and digs a bit more ...

Ahhhh, original process_trailers() did this by calling
parse_trailers() that munged the value.  So its print_all()
codepath only had to print what has already been munged.

But then in this new code, do we unfold only here?  I thought that
in the new code you moved around, the caller, whose name is now
interpret_trailers(), still calls parse_trailers() and that calls
the unfold_value().  So, are we doing redundant work that may merely
be unneeded (if we are lucky) or could be harmful (if things, other
than the unfold thing I just noticed, that are done both in
parse_trailers() and this function are not idempotent)?

It could be that a bit more preparatory refactoring would clarify.
I dunno.

> +				if (opts->separator && need_separator)
> +					strbuf_addbuf(out, opts->separator);
> +				if (!opts->value_only)
> +					strbuf_addbuf(out, &tok);
> +				if (!opts->key_only && !opts->value_only) {
> +					if (opts->key_value_separator)
> +						strbuf_addbuf(out, opts->key_value_separator);
> +					else {
> +						c = last_non_space_char(tok.buf);
> +						if (c) {
> +							if (!strchr(separators, c))
> +								strbuf_addf(out, "%c ", separators[0]);
> +						}
> +					}
> +				}
> +				if (!opts->key_only)
> +					strbuf_addbuf(out, &val);
> +				if (!opts->separator)
> +					strbuf_addch(out, '\n');
> +
> +				need_separator = 1;
> +			}
> +
> +			strbuf_release(&tok);
> +			strbuf_release(&val);
> +		} else if (!opts->only_trailers) {
> +			if (opts->separator && need_separator) {
> +				strbuf_addbuf(out, opts->separator);
> +			}
> +			strbuf_addstr(out, item->value);
> +			if (opts->separator)
> +				strbuf_rtrim(out);
> +			else
> +				strbuf_addch(out, '\n');
> +
> +			need_separator = 1;
> +		}
> +
> +	}
> +}
Junio C Hamano Jan. 31, 2024, 10:16 p.m. UTC | #3
Junio C Hamano <gitster@pobox.com> writes:

>> +				if (opts->unfold)
>> +					unfold_value(&val);
>
> So, ... how would this affect an invocation of
>
>     $ git interpret-trailers --unfold
>
> which would have set opts.unfold to true in cmd_interpret_trailers()
> and eventually called process_trailers() with it, which eventually
> called print_all(), which conditionally called print_tok_val() but
> never gave the val to unfold_value()?
>
>  ... goes and digs a bit more ...
>
> Ahhhh, original process_trailers() did this by calling
> parse_trailers() that munged the value.  So its print_all()
> codepath only had to print what has already been munged.
>
> But then in this new code, do we unfold only here?  I thought that
> in the new code you moved around, the caller, whose name is now
> interpret_trailers(), still calls parse_trailers() and that calls
> the unfold_value().  So, are we doing redundant work that may merely
> be unneeded (if we are lucky) or could be harmful (if things, other
> than the unfold thing I just noticed, that are done both in
> parse_trailers() and this function are not idempotent)?

I was too confused by the code flow and resorted to tracing what
happens when "git interpret-trailers --unfold" is run in a way
similar to how t7513 does in gdb.  Shame shame.

In any case, the updated code does try to call unfold_value() in
format_trailers() on "val" that has already been unfolded in
parse_trailers().  This may not produce an incorrect result if
unfold_value() is truly idempotent (I do not know), but even if it
is correct for its handling of .unfold bit, this episode lowered my
confidence level on the new code significantly, as I do not know
what unintended effects [*] all the other new things done in this
function have, or if none of these unintended effects are
error-free.

> It could be that a bit more preparatory refactoring would clarify.
> I dunno.

Thanks.

[Footnote] 

 * I am assuming that calling unfold twice on the same value and
   relying for its idempotent-ness for correctness was not your
   intention.
Linus Arver Jan. 31, 2024, 11:21 p.m. UTC | #4
Josh Steadmon <steadmon@google.com> writes:

> On 2024.01.31 01:22, Linus Arver via GitGitGadget wrote:
>> This unification will allow us to delete the format_trailer_info() and
>> print_tok_val() functions in the next patch. They are not deleted here
>> in order to keep the diff small.
>
> Needs to be removed after squashing v2 patch 4 :)

Oops. Will update in next reroll, thanks.
Linus Arver Jan. 31, 2024, 11:29 p.m. UTC | #5
Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> From: Linus Arver <linusa@google.com>
>>
>> Currently have two functions for formatting trailers exposed in
>> trailer.h:
>>
>>     void format_trailers(FILE *outfile, struct list_head *head,
>>                         const struct process_trailer_options *opts);
>>
>>     void format_trailers_from_commit(struct strbuf *out, const char *msg,
>>                                     const struct process_trailer_options *opts);
>>
>> and previously these functions, although similar enough (even taking the
>> same process_trailer_options struct pointer), did not build on each
>> other.
>>
>> Make format_trailers_from_commit() rely on format_trailers(). Teach
>> format_trailers() to process trailers with the additional
>> process_trailer_options fields like opts->key_only which is only used by
>> format_trailers_from_commit() and not builtin/interpret-trailers.c.
>> While we're at it, reorder parameters to put the trailer processing
>> options first, and the out parameter (strbuf we write into) at the end.
>>
>> This unification will allow us to delete the format_trailer_info() and
>> print_tok_val() functions in the next patch. They are not deleted here
>> in order to keep the diff small.
>>
>> Helped-by: Junio C Hamano <gitster@pobox.com>
>> Signed-off-by: Linus Arver <linusa@google.com>
>> ---
>
> I am not sure how I helped this particularly (say, compared to all
> the other patches on the list from everybody), though.

You made a suggestion to put trailer processing options first for
function parameters, which I've adopted in this series (but also in the
larger series).
Linus Arver Feb. 1, 2024, 12:46 a.m. UTC | #6
Junio C Hamano <gitster@pobox.com> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>>> +				if (opts->unfold)
>>> +					unfold_value(&val);
>>
>> So, ... how would this affect an invocation of
>>
>>     $ git interpret-trailers --unfold
>>
>> which would have set opts.unfold to true in cmd_interpret_trailers()
>> and eventually called process_trailers() with it, which eventually
>> called print_all(), which conditionally called print_tok_val() but
>> never gave the val to unfold_value()?
>>
>>  ... goes and digs a bit more ...
>>
>> Ahhhh, original process_trailers() did this by calling
>> parse_trailers() that munged the value.  So its print_all()
>> codepath only had to print what has already been munged.
>>
>> But then in this new code, do we unfold only here?  I thought that
>> in the new code you moved around, the caller, whose name is now
>> interpret_trailers(), still calls parse_trailers() and that calls
>> the unfold_value().  So, are we doing redundant work that may merely
>> be unneeded (if we are lucky) or could be harmful (if things, other
>> than the unfold thing I just noticed, that are done both in
>> parse_trailers() and this function are not idempotent)?
>
> I was too confused by the code flow and resorted to tracing what
> happens when "git interpret-trailers --unfold" is run in a way
> similar to how t7513 does in gdb.  Shame shame.

In my larger series I've made the parser and formater much simpler
(removing nesting, calling helper functions, _only parsing once and only
once_, etc) to make both parsing and formatting much easier to
understand. While I am biased as the author of these refactors, I do
think they make the code simpler.

> In any case, the updated code does try to call unfold_value() in
> format_trailers() on "val" that has already been unfolded in
> parse_trailers().

Correct. But this was already the case before this series. IOW the
existing code assumes that this function is idempotent: we call
unfold_value() in parse_trailers() and again in format_trailer_info().

In my tests if I remove the redundant call to unfold_value() in
the new formatter (so that unfolding only happens during
parse_trailers()), all trailer-related tests still pass:

    prove -j47 t{7513,3507,7502,4205,3511,3428,6300}*.sh

FWIW in the larger series we prohibit the parser from making mutations
to the input (unfolding is one such mutation), and allow multiline
(folded) strings to be stored as is in "val", only unfolding the value
if we need to during formatting.

> This may not produce an incorrect result if
> unfold_value() is truly idempotent (I do not know), but even if it
> is correct for its handling of .unfold bit, this episode lowered my
> confidence level on the new code significantly, as I do not know
> what unintended effects [*] all the other new things done in this
> function have, or if none of these unintended effects are
> error-free.

I think dropping the redundant call to unfold_value() would help address
some of your concerns. Of course, all of the existing test cases pass
(with and without the redundant call). As for addressing _all_ (not just
some) concerns, I am not confident I can deliver that as an additional
patch or two to this series because ...

>> It could be that a bit more preparatory refactoring would clarify.
>> I dunno.

... this is basically what I've done in the larger series, because there
are many areas that could be simplified (not to mention addressing some
bugs some bugs are lurking around). Granted, those are more aggressive
refactorings, and are not "preparatory refactoring" at all.

I could just grow this series with another ~22 patches to include those
additional refactors, but I am hesitant about doing so, simply due to
the sheer number of them.

How would you like me to proceed, other than deleting the redundant
unfold_value() call in the next reroll?

For reference, here's a list of those 22 commits that build on this
series as a preview (roughly grouped on themes):

    trailer formatter: deprecate format_trailers()
    trailer formatter: introduce format_trailer_block()
    trailer parser: parse_trailer_v2() for '--trailer <...>' CLI arguments
    trailer parser: parse_trailer_v2() for configuration
    trailer parser: parse_trailer_v2() for trailer blocks
    trailer parser: introduce parse_trailer_v2()
    interpret-trailers: preserve trailers coming from the input
    trailer_block: remove trailer_strings member
    trailer_block: parse trailers in one place
    trailer_block: prepare to remove trailer_strings member
    trailer: make find_same_and_apply_arg() do one thing
    trailer: unify global configuration
    interpret-trailers: print error if given unrecognized arguments
    trailer: make entire iterator struct private
    trailer: rename terms for consistency
    trailer: use create_new_target_for() everywhere
    trailer: capture behavior of addIfDifferentNeighbor
    trailer: EXISTS_REPLACE preserves original position
    trailer: capture replacement behavior for multiple matches
    trailer: split template application into 2 steps
    trailer: free templates in one place
    trailer: template's final value does not depend on in_tok

and "trailer formatter: introduce format_trailer_block()" is the one
that refactors the unified formatter introduced in this patch to have
multiple smaller helper functions.
Junio C Hamano Feb. 1, 2024, 1:07 a.m. UTC | #7
Linus Arver <linusa@google.com> writes:

>> In any case, the updated code does try to call unfold_value() in
>> format_trailers() on "val" that has already been unfolded in
>> parse_trailers().
>
> Correct. But this was already the case before this series. IOW the
> existing code assumes that this function is idempotent: we call
> unfold_value() in parse_trailers() and again in format_trailer_info().

But not in format_trailers(), which is the theme of this step.  In
other words, the behaviour before and after this step of the
function are not the same (modulo that the new version stores the
output in a strbuf), and as the way the changes are presented here,
it is almost impossible to make sure that we are not introducing
regressions, without making such assumptions like "unfold_value() is
idempotent and we already rely on that fact elsewhere in the code".

It is not just "unfold", which was the first thing I happened to
notice, by the way.  There are many more lines in the new lines of
that function, doing things that the original version did not appear
to do, or doing in very different ways.

> I could just grow this series with another ~22 patches to include those
> additional refactors, but I am hesitant about doing so, simply due to
> the sheer number of them.

I actually do not mind at all if you started with a preliminary
clean-up series, and stopped the first batch somewhere in the middle
of the 20+ patches before even reaching any of these 10 patches we
see here, if that gives us a readable set of bite-sized changes that
prepare a solid foundation to rebuild things on top.  I am having a
feeling that not even a single person has reviewed them on list even
though we are already at the third iteration, which is quite
frustrating (and I would imagine that it would be frustrating for
you, too), and I suspect that the step like [v3 03/10] that makes
too large a change with too little explanation (and perhaps a bit of
"trust me, this does not change the behaviour") is one contributing
factor why people are afraid of touching it.
Junio C Hamano Feb. 1, 2024, 4:41 p.m. UTC | #8
Junio C Hamano <gitster@pobox.com> writes:

> ..., if that gives us a readable set of bite-sized changes that
> prepare a solid foundation to rebuild things on top.  I am having a
> feeling that not even a single person has reviewed them on list even
> though we are already at the third iteration, which is quite
> frustrating (and I would imagine that it would be frustrating for
> you, too), ...

I guess "not even a single person" was a bit unfair, given that we
did see some minor comments from me and also Josh during the first
round.

But neither of two rounds saw any in-depth reviews that question "is
the code doing the right thing?", which would take a real reading of
and a comparison between the code before and after the patches, with
some understanding of how things have been working, how things were
envisioned to evolve, and how the patch author would want to change
the course of the evolution of the code in the longer term.  

Christian, you've been touching the code in this area the longuest.
Can we have some of your time reviewing these?

Thanks.
Junio C Hamano Feb. 1, 2024, 5:48 p.m. UTC | #9
Linus Arver <linusa@google.com> writes:

> Josh Steadmon <steadmon@google.com> writes:
>
>> On 2024.01.31 01:22, Linus Arver via GitGitGadget wrote:
>>> This unification will allow us to delete the format_trailer_info() and
>>> print_tok_val() functions in the next patch. They are not deleted here
>>> in order to keep the diff small.
>>
>> Needs to be removed after squashing v2 patch 4 :)
>
> Oops. Will update in next reroll, thanks.

FWIW, by the way, having them in the same patch made it a lot easier
to compare what the original did (with these removed functions) and
what the updated code would do.  When a change is supposed to be a
clean-up of an existing code without changing the behaviour, it helps
to make the before and after versions visible in the patch.

Thanks.
Linus Arver Feb. 1, 2024, 6:22 p.m. UTC | #10
Junio C Hamano <gitster@pobox.com> writes:

> Linus Arver <linusa@google.com> writes:
>
>> Josh Steadmon <steadmon@google.com> writes:
>>
>>> On 2024.01.31 01:22, Linus Arver via GitGitGadget wrote:
>>>> This unification will allow us to delete the format_trailer_info() and
>>>> print_tok_val() functions in the next patch. They are not deleted here
>>>> in order to keep the diff small.
>>>
>>> Needs to be removed after squashing v2 patch 4 :)
>>
>> Oops. Will update in next reroll, thanks.
>
> FWIW, by the way, having them in the same patch made it a lot easier
> to compare what the original did (with these removed functions) and
> what the updated code would do.  When a change is supposed to be a
> clean-up of an existing code without changing the behaviour, it helps
> to make the before and after versions visible in the patch.
>
> Thanks.

Ack, will try to keep that in mind.
Linus Arver Feb. 1, 2024, 6:26 p.m. UTC | #11
Junio C Hamano <gitster@pobox.com> writes:

> Linus Arver <linusa@google.com> writes:
>
>> I could just grow this series with another ~22 patches to include those
>> additional refactors, but I am hesitant about doing so, simply due to
>> the sheer number of them.
>
> I actually do not mind at all if you started with a preliminary
> clean-up series, and stopped the first batch somewhere in the middle
> of the 20+ patches before even reaching any of these 10 patches we
> see here, if that gives us a readable set of bite-sized changes that
> prepare a solid foundation to rebuild things on top.  I am having a
> feeling that not even a single person has reviewed them on list even
> though we are already at the third iteration, which is quite
> frustrating (and I would imagine that it would be frustrating for
> you, too), and I suspect that the step like [v3 03/10] that makes
> too large a change with too little explanation (and perhaps a bit of
> "trust me, this does not change the behaviour") is one contributing
> factor why people are afraid of touching it.

I am planning to spend today trying to break up this patch into smaller
preparatory chunks that still end up at the same state as this patch.

Will post another update on how this goes by EOD. Thanks.
Junio C Hamano Feb. 1, 2024, 7:21 p.m. UTC | #12
Linus Arver <linusa@google.com> writes:

> I am planning to spend today trying to break up this patch into smaller
> preparatory chunks that still end up at the same state as this patch.
>
> Will post another update on how this goes by EOD. Thanks.

I stopped reading the function after noticing the double unfolding,
so there may be similar "why do we do this unexplained new thing in
the function that the original didn't?" issues in the "same state",

In any case, if I understood your plan I heard from you in the
discussion yesterday correctly, the unfolding should not be added to
format (to make it double), but would be moved from parse to format
in a single step.  It would avoid making it double, and would make
the parse step about purely parsing without modification, which is a
very worthwhile thing to do.  So I am not sure if we want to end up
with the same state in the first place, though.

THanks.
Linus Arver Feb. 2, 2024, 7:23 a.m. UTC | #13
Junio C Hamano <gitster@pobox.com> writes:

> Linus Arver <linusa@google.com> writes:
>
>> I am planning to spend today trying to break up this patch into smaller
>> preparatory chunks that still end up at the same state as this patch.
>>
>> Will post another update on how this goes by EOD. Thanks.

I'm happy to report that this breaking-up of things is going a lot
smoother than expected. The hard part is not so much the rearranging of
code but rather thinking up of good commit messages to explain the
intent of each smaller step. I have most of this patch broken up into
much smaller steps with all tests passing.

> I stopped reading the function after noticing the double unfolding,
> so there may be similar "why do we do this unexplained new thing in
> the function that the original didn't?" issues in the "same state",

Yep, and these unexplained new things are the things I am recalling
(from a few weeks back when I first started this series) and am now
trying to find the right commit messages for.

> In any case, if I understood your plan I heard from you in the
> discussion yesterday correctly, the unfolding should not be added to
> format (to make it double),

Yes, it will be done during parsing for now. This is because of
additional refactors that are coming in the later series, which ...

> but would be moved from parse to format
> in a single step.  It would avoid making it double, and would make
> the parse step about purely parsing without modification, which is a
> very worthwhile thing to do.

... achieve this exact thing (unfolding is done "lazily" during
formatting only, not "eagerly" as we do now during parsing).

> So I am not sure if we want to end up
> with the same state in the first place, though.
>
> THanks.

So far the breaking-up of this patch has not revealed a need to change
the end result significantly from what I have here. But I admit I am not
sure this will be the case for the other patches that need similar
treatment in this series.

Considering how it took me most of the day to break up this single
patch, I am not sure I can get a v4 ready by tomorrow. Might need a few
days for that.

Thank you for your very helpful review comments for this series so far.
They have been quite illuminating. :)
Junio C Hamano Feb. 2, 2024, 5:26 p.m. UTC | #14
Linus Arver <linusa@google.com> writes:

> The hard part is not so much the rearranging of
> code but rather thinking up of good commit messages to explain the
> intent of each smaller step.

Good.

Coming up with a good explanation of what you did forces you to
think it thorough again.  People often discover bugs in the code
they just wrote, or different and better ways to solve the problem,
while explaining the change to their colleagues, and I found it has
very similar effect to try to write a good explanation in the commit
log.  Time to write such a good explanation is time worth investing
in.

Incidentally, this is why we value clearly written proposed log
message while reviewing.  A commit that is a product of "I tried
this, which did not work, and then tried something else, which did
not work either, and after some more random tries, this seems to
work" would not have a clear thinking behind it, but if such
iterations of random tries happened to hit the best solution,
thinking backwards from the result to the original problem and
explaining how the solution works would result in a good
description.

Thanks.
diff mbox series

Patch

diff --git a/builtin/interpret-trailers.c b/builtin/interpret-trailers.c
index 8556acde4aa..5352ee65bd1 100644
--- a/builtin/interpret-trailers.c
+++ b/builtin/interpret-trailers.c
@@ -140,6 +140,7 @@  static void interpret_trailers(const struct process_trailer_options *opts,
 {
 	LIST_HEAD(head);
 	struct strbuf sb = STRBUF_INIT;
+	struct strbuf trailer_block = STRBUF_INIT;
 	struct trailer_info info;
 	FILE *outfile = stdout;
 
@@ -169,7 +170,10 @@  static void interpret_trailers(const struct process_trailer_options *opts,
 		process_trailers_lists(&head, &arg_head);
 	}
 
-	format_trailers(opts, &head, outfile);
+	/* Print trailer block. */
+	format_trailers(opts, &head, &trailer_block);
+	fwrite(trailer_block.buf, 1, trailer_block.len, outfile);
+	strbuf_release(&trailer_block);
 
 	free_trailers(&head);
 	trailer_info_release(&info);
diff --git a/pretty.c b/pretty.c
index cf964b060cd..bdbed4295aa 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1759,7 +1759,7 @@  static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
 				goto trailer_out;
 		}
 		if (*arg == ')') {
-			format_trailers_from_commit(sb, msg + c->subject_off, &opts);
+			format_trailers_from_commit(&opts, msg + c->subject_off, sb);
 			ret = arg - placeholder + 1;
 		}
 	trailer_out:
diff --git a/ref-filter.c b/ref-filter.c
index 35b989e1dfe..d358953b0ce 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1985,7 +1985,7 @@  static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 			struct strbuf s = STRBUF_INIT;
 
 			/* Format the trailer info according to the trailer_opts given */
-			format_trailers_from_commit(&s, subpos, &atom->u.contents.trailer_opts);
+			format_trailers_from_commit(&atom->u.contents.trailer_opts, subpos, &s);
 
 			v->s = strbuf_detach(&s, NULL);
 		} else if (atom->u.contents.option == C_BARE)
diff --git a/trailer.c b/trailer.c
index d3899195876..71ea2bb67f8 100644
--- a/trailer.c
+++ b/trailer.c
@@ -144,37 +144,6 @@  static char last_non_space_char(const char *s)
 	return '\0';
 }
 
-static void print_tok_val(FILE *outfile, const char *tok, const char *val)
-{
-	char c;
-
-	if (!tok) {
-		fprintf(outfile, "%s\n", val);
-		return;
-	}
-
-	c = last_non_space_char(tok);
-	if (!c)
-		return;
-	if (strchr(separators, c))
-		fprintf(outfile, "%s%s\n", tok, val);
-	else
-		fprintf(outfile, "%s%c %s\n", tok, separators[0], val);
-}
-
-void format_trailers(const struct process_trailer_options *opts,
-		     struct list_head *trailers, FILE *outfile)
-{
-	struct list_head *pos;
-	struct trailer_item *item;
-	list_for_each(pos, trailers) {
-		item = list_entry(pos, struct trailer_item, list);
-		if ((!opts->trim_empty || strlen(item->value) > 0) &&
-		    (!opts->only_trailers || item->token))
-			print_tok_val(outfile, item->token, item->value);
-	}
-}
-
 static struct trailer_item *trailer_from_arg(struct arg_item *arg_tok)
 {
 	struct trailer_item *new_item = xcalloc(1, sizeof(*new_item));
@@ -984,6 +953,78 @@  static void unfold_value(struct strbuf *val)
 	strbuf_release(&out);
 }
 
+void format_trailers(const struct process_trailer_options *opts,
+		     struct list_head *trailers,
+		     struct strbuf *out)
+{
+	struct list_head *pos;
+	struct trailer_item *item;
+	int need_separator = 0;
+
+	list_for_each(pos, trailers) {
+		item = list_entry(pos, struct trailer_item, list);
+		if (item->token) {
+			char c;
+
+			struct strbuf tok = STRBUF_INIT;
+			struct strbuf val = STRBUF_INIT;
+			strbuf_addstr(&tok, item->token);
+			strbuf_addstr(&val, item->value);
+
+			/*
+			 * Skip key/value pairs where the value was empty. This
+			 * can happen from trailers specified without a
+			 * separator, like `--trailer "Reviewed-by"` (no
+			 * corresponding value).
+			 */
+			if (opts->trim_empty && !strlen(item->value))
+				continue;
+
+			if (!opts->filter || opts->filter(&tok, opts->filter_data)) {
+				if (opts->unfold)
+					unfold_value(&val);
+
+				if (opts->separator && need_separator)
+					strbuf_addbuf(out, opts->separator);
+				if (!opts->value_only)
+					strbuf_addbuf(out, &tok);
+				if (!opts->key_only && !opts->value_only) {
+					if (opts->key_value_separator)
+						strbuf_addbuf(out, opts->key_value_separator);
+					else {
+						c = last_non_space_char(tok.buf);
+						if (c) {
+							if (!strchr(separators, c))
+								strbuf_addf(out, "%c ", separators[0]);
+						}
+					}
+				}
+				if (!opts->key_only)
+					strbuf_addbuf(out, &val);
+				if (!opts->separator)
+					strbuf_addch(out, '\n');
+
+				need_separator = 1;
+			}
+
+			strbuf_release(&tok);
+			strbuf_release(&val);
+		} else if (!opts->only_trailers) {
+			if (opts->separator && need_separator) {
+				strbuf_addbuf(out, opts->separator);
+			}
+			strbuf_addstr(out, item->value);
+			if (opts->separator)
+				strbuf_rtrim(out);
+			else
+				strbuf_addch(out, '\n');
+
+			need_separator = 1;
+		}
+
+	}
+}
+
 /*
  * Parse trailers in "str", populating the trailer info and "head"
  * linked list structure.
@@ -1083,74 +1124,25 @@  void trailer_info_release(struct trailer_info *info)
 	free(info->trailers);
 }
 
-static void format_trailer_info(struct strbuf *out,
-				const struct trailer_info *info,
-				const char *msg,
-				const struct process_trailer_options *opts)
+void format_trailers_from_commit(const struct process_trailer_options *opts,
+				 const char *msg,
+				 struct strbuf *out)
 {
-	size_t origlen = out->len;
-	size_t i;
+	LIST_HEAD(head);
+	struct trailer_info info;
+
+	parse_trailers(opts, &info, msg, &head);
 
 	/* If we want the whole block untouched, we can take the fast path. */
 	if (!opts->only_trailers && !opts->unfold && !opts->filter &&
 	    !opts->separator && !opts->key_only && !opts->value_only &&
 	    !opts->key_value_separator) {
-		strbuf_add(out, msg + info->trailer_block_start,
-			   info->trailer_block_end - info->trailer_block_start);
-		return;
-	}
-
-	for (i = 0; i < info->trailer_nr; i++) {
-		char *trailer = info->trailers[i];
-		ssize_t separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos >= 1) {
-			struct strbuf tok = STRBUF_INIT;
-			struct strbuf val = STRBUF_INIT;
-
-			parse_trailer(&tok, &val, NULL, trailer, separator_pos);
-			if (!opts->filter || opts->filter(&tok, opts->filter_data)) {
-				if (opts->unfold)
-					unfold_value(&val);
-
-				if (opts->separator && out->len != origlen)
-					strbuf_addbuf(out, opts->separator);
-				if (!opts->value_only)
-					strbuf_addbuf(out, &tok);
-				if (!opts->key_only && !opts->value_only) {
-					if (opts->key_value_separator)
-						strbuf_addbuf(out, opts->key_value_separator);
-					else
-						strbuf_addstr(out, ": ");
-				}
-				if (!opts->key_only)
-					strbuf_addbuf(out, &val);
-				if (!opts->separator)
-					strbuf_addch(out, '\n');
-			}
-			strbuf_release(&tok);
-			strbuf_release(&val);
-
-		} else if (!opts->only_trailers) {
-			if (opts->separator && out->len != origlen) {
-				strbuf_addbuf(out, opts->separator);
-			}
-			strbuf_addstr(out, trailer);
-			if (opts->separator) {
-				strbuf_rtrim(out);
-			}
-		}
-	}
-
-}
-
-void format_trailers_from_commit(struct strbuf *out, const char *msg,
-				 const struct process_trailer_options *opts)
-{
-	struct trailer_info info;
+		strbuf_add(out, msg + info.trailer_block_start,
+			   info.trailer_block_end - info.trailer_block_start);
+	} else
+		format_trailers(opts, &head, out);
 
-	trailer_info_get(&info, msg, opts);
-	format_trailer_info(out, &info, msg, opts);
+	free_trailers(&head);
 	trailer_info_release(&info);
 }
 
diff --git a/trailer.h b/trailer.h
index ca701c04f3b..244f29fc91f 100644
--- a/trailer.h
+++ b/trailer.h
@@ -101,22 +101,17 @@  void trailer_info_release(struct trailer_info *info);
 
 void trailer_config_init(void);
 void format_trailers(const struct process_trailer_options *opts,
-		     struct list_head *trailers, FILE *outfile);
+		     struct list_head *trailers,
+		     struct strbuf *out);
 void free_trailers(struct list_head *);
 
 /*
- * Format the trailers from the commit msg "msg" into the strbuf "out".
- * Note two caveats about "opts":
- *
- *   - this is primarily a helper for pretty.c, and not
- *     all of the flags are supported.
- *
- *   - this differs from process_trailers slightly in that we always format
- *     only the trailer block itself, even if the "only_trailers" option is not
- *     set.
+ * Convenience function to format the trailers from the commit msg "msg" into
+ * the strbuf "out". Reuses format_trailers internally.
  */
-void format_trailers_from_commit(struct strbuf *out, const char *msg,
-				 const struct process_trailer_options *opts);
+void format_trailers_from_commit(const struct process_trailer_options *opts,
+				 const char *msg,
+				 struct strbuf *out);
 
 /*
  * An interface for iterating over the trailers found in a particular commit