diff mbox series

[v3,04/10] sequencer: use the trailer iterator

Message ID bf2b8e1a3c4bc77022fab1ebaa0fc89a7813b4c4.1706664145.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Enrich Trailer API | expand

Commit Message

Linus Arver Jan. 31, 2024, 1:22 a.m. UTC
From: Linus Arver <linusa@google.com>

This patch allows for the removal of "trailer_info_get()" from the
trailer.h API, which will be in the next patch.

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Also, teach the iterator about non-trailer lines, by adding a new field
called "raw" to hold both trailer and non-trailer lines. This is
necessary because a "trailer block" is a list of trailer lines of at
least 25% trailers (see 146245063e (trailer: allow non-trailers in
trailer block, 2016-10-21)), such that it may hold non-trailer lines.

Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/shortlog.c |  7 +++++--
 sequencer.c        | 35 +++++++++++++++--------------------
 trailer.c          | 17 +++++++++--------
 trailer.h          | 13 +++++++++++++
 4 files changed, 42 insertions(+), 30 deletions(-)

Comments

Junio C Hamano Feb. 1, 2024, 6:06 p.m. UTC | #1
"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> This patch allows for the removal of "trailer_info_get()" from the
> trailer.h API, which will be in the next patch.

Hmph, do you mean "shortlog" and the sequencer were the only two
external callers and with this we can make it file-scope static to
trailer.c?  Or do you mean the next step will be more than a removal
of a declaration from trailer.h plus adding "static" in front of its
definition in trailer.c, because there need other adjustments before
that happens?

> Instead of calling "trailer_info_get()", which is a low-level function
> in the trailers implementation (trailer.c), call
> trailer_iterator_advance(), which was specifically designed for public
> consumption in f0939a0eb1 (trailer: add interface for iterating over
> commit trailers, 2020-09-27).

;-).

> Also, teach the iterator about non-trailer lines, by adding a new field
> called "raw" to hold both trailer and non-trailer lines. This is
> necessary because a "trailer block" is a list of trailer lines of at
> least 25% trailers (see 146245063e (trailer: allow non-trailers in
> trailer block, 2016-10-21)), such that it may hold non-trailer lines.

That sounds like a task larger than something we would want in a
patch that focuses on another task (e.g. update sequencer not to
call trailer_info_get()) while at it.  It seems from a casual glance
that the change to shortlog.c is to accomodate this change in the
semantics of what the iterator could return?  It smells that this
patch does two more or less unrelated things at the same time?

> Signed-off-by: Linus Arver <linusa@google.com>
> ---
>  builtin/shortlog.c |  7 +++++--
>  sequencer.c        | 35 +++++++++++++++--------------------
>  trailer.c          | 17 +++++++++--------
>  trailer.h          | 13 +++++++++++++
>  4 files changed, 42 insertions(+), 30 deletions(-)
>
> diff --git a/builtin/shortlog.c b/builtin/shortlog.c
> index 1307ed2b88a..dc8fd5a5532 100644
> --- a/builtin/shortlog.c
> +++ b/builtin/shortlog.c
> @@ -172,7 +172,7 @@ static void insert_records_from_trailers(struct shortlog *log,
>  					 const char *oneline)
>  {
>  	struct trailer_iterator iter;
> -	const char *commit_buffer, *body;
> +	const char *commit_buffer, *body, *value;
>  	struct strbuf ident = STRBUF_INIT;
>  
>  	if (!log->trailers.nr)
> @@ -190,7 +190,10 @@ static void insert_records_from_trailers(struct shortlog *log,
>  
>  	trailer_iterator_init(&iter, body);
>  	while (trailer_iterator_advance(&iter)) {
> -		const char *value = iter.val.buf;
> +		if (!iter.is_trailer)
> +			continue;
> +
> +		value = iter.val.buf;
>  
>  		if (!string_list_has_string(&log->trailers, iter.key.buf))
>  			continue;
> diff --git a/sequencer.c b/sequencer.c
> index 3cc88d8a800..bc7c82c5271 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -319,37 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
>  static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
>  	size_t ignore_footer)
>  {
> -	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
> -	struct trailer_info info;
> -	size_t i;
> -	int found_sob = 0, found_sob_last = 0;
> -	char saved_char;
> -
> -	opts.no_divider = 1;
> +	struct trailer_iterator iter;
> +	size_t i = 0, found_sob = 0;
> +	char saved_char = sb->buf[sb->len - ignore_footer];
>  
>  	if (ignore_footer) {
> -		saved_char = sb->buf[sb->len - ignore_footer];
>  		sb->buf[sb->len - ignore_footer] = '\0';
>  	}
>  
> -	trailer_info_get(&info, sb->buf, &opts);
> +	trailer_iterator_init(&iter, sb->buf);
> +	while (trailer_iterator_advance(&iter)) {
> +		i++;
> +		if (sob &&
> +		    iter.is_trailer &&
> +		    !strncmp(iter.raw, sob->buf, sob->len)) {
> +			found_sob = i;
> +		}
> +	}
> +	trailer_iterator_release(&iter);
>  
>  	if (ignore_footer)
>  		sb->buf[sb->len - ignore_footer] = saved_char;
>  
> -	if (info.trailer_block_start == info.trailer_block_end)
> +	if (!i)
>  		return 0;
>  
> -	for (i = 0; i < info.trailer_nr; i++)
> -		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
> -			found_sob = 1;
> -			if (i == info.trailer_nr - 1)
> -				found_sob_last = 1;
> -		}
> -
> -	trailer_info_release(&info);
> -
> -	if (found_sob_last)
> +	if (found_sob == i)
>  		return 3;
>  	if (found_sob)
>  		return 2;
> diff --git a/trailer.c b/trailer.c
> index 71ea2bb67f8..5bcc9b0006c 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -1158,17 +1158,18 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>  
>  int trailer_iterator_advance(struct trailer_iterator *iter)
>  {
> -	while (iter->internal.cur < iter->internal.info.trailer_nr) {
> -		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
> -		int separator_pos = find_separator(trailer, separators);
> -
> -		if (separator_pos < 1)
> -			continue; /* not a real trailer */
> -
> +	char *line;
> +	int separator_pos;
> +	if (iter->internal.cur < iter->internal.info.trailer_nr) {
> +		line = iter->internal.info.trailers[iter->internal.cur++];
> +		separator_pos = find_separator(line, separators);
> +		iter->is_trailer = (separator_pos > 0);
> +
> +		iter->raw = line;
>  		strbuf_reset(&iter->key);
>  		strbuf_reset(&iter->val);
>  		parse_trailer(&iter->key, &iter->val, NULL,
> -			      trailer, separator_pos);
> +			      line, separator_pos);
>  		unfold_value(&iter->val);
>  		return 1;
>  	}
> diff --git a/trailer.h b/trailer.h
> index 244f29fc91f..a7599067acc 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -127,6 +127,19 @@ struct trailer_iterator {
>  	struct strbuf key;
>  	struct strbuf val;
>  
> +	/*
> +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +	 * key/val pair as part of a trailer block. A trailer block can be
> +	 * either 100% trailer lines, or mixed in with non-trailer lines (in
> +	 * which case at least 25% must be trailer lines).
> +	 */
> +	const char *raw;
> +
> +	/*
> +	 * 1 if the raw line was parsed as a trailer line (key/val pair).
> +	 */
> +	int is_trailer;
> +
>  	/* private */
>  	struct {
>  		struct trailer_info info;
Linus Arver Feb. 1, 2024, 7:14 p.m. UTC | #2
Junio C Hamano <gitster@pobox.com> writes:

> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> From: Linus Arver <linusa@google.com>
>>
>> This patch allows for the removal of "trailer_info_get()" from the
>> trailer.h API, which will be in the next patch.
>
> Hmph, do you mean "shortlog" and the sequencer were the only two
> external callers and with this we can make it file-scope static to
> trailer.c?

This was what I meant (originally ...

> Or do you mean the next step will be more than a removal
> of a declaration from trailer.h plus adding "static" in front of its
> definition in trailer.c, because there need other adjustments before
> that happens?

... but now I realize that the operation adds a few small tweaks, such
as tweaking the parameters it expects and also what it returns). In the
spirit of breaking up patch 3, I will also break this up into
preparatory patches as well.

>> Also, teach the iterator about non-trailer lines, by adding a new field
>> called "raw" to hold both trailer and non-trailer lines. This is
>> necessary because a "trailer block" is a list of trailer lines of at
>> least 25% trailers (see 146245063e (trailer: allow non-trailers in
>> trailer block, 2016-10-21)), such that it may hold non-trailer lines.
>
> That sounds like a task larger than something we would want in a
> patch that focuses on another task (e.g. update sequencer not to
> call trailer_info_get()) while at it.  It seems from a casual glance
> that the change to shortlog.c is to accomodate this change in the
> semantics of what the iterator could return?  It smells that this
> patch does two more or less unrelated things at the same time?

I think you're correct. Hopefully breaking this up will make things
easier to review.

I am learning very quickly from your review comments in patch 3 and in
here that, in the absence of area experts, the existing tests/CI tests
cannot be trusted alone (after all some tests could be missing), which
makes it more important to do so-called "micro-commits".

But overall, breaking things up is a good thing anyway as a general
practice, so, I think this is a good lesson. TBH I have a (bad) habit of
saying "is the diff ~100 lines?" and if so I don't spend any time
thinking of breaking these up. X-<

Thanks.
Linus Arver Feb. 3, 2024, 12:39 a.m. UTC | #3
Linus Arver <linusa@google.com> writes:

> Junio C Hamano <gitster@pobox.com> writes:
>
>> "Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> From: Linus Arver <linusa@google.com>
>>>
>>> Also, teach the iterator about non-trailer lines, by adding a new field
>>> called "raw" to hold both trailer and non-trailer lines. This is
>>> necessary because a "trailer block" is a list of trailer lines of at
>>> least 25% trailers (see 146245063e (trailer: allow non-trailers in
>>> trailer block, 2016-10-21)), such that it may hold non-trailer lines.
>>
>> That sounds like a task larger than something we would want in a
>> patch that focuses on another task (e.g. update sequencer not to
>> call trailer_info_get()) while at it.  It seems from a casual glance
>> that the change to shortlog.c is to accomodate this change in the
>> semantics of what the iterator could return?  It smells that this
>> patch does two more or less unrelated things at the same time?
>
> I think you're correct. Hopefully breaking this up will make things
> easier to review.

And now that I've broken it up locally, I can say that the change I made
to shortlog was unnecessary (shortlog already has a check to see if the
trailer key is empty) which makes the "is_trailer" check I added to it
here redundant (because non-trailer lines, which the new iterator can
iterate over, have empty keys).

Will remove the shortlog change in v4. Thanks.
diff mbox series

Patch

diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 1307ed2b88a..dc8fd5a5532 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -172,7 +172,7 @@  static void insert_records_from_trailers(struct shortlog *log,
 					 const char *oneline)
 {
 	struct trailer_iterator iter;
-	const char *commit_buffer, *body;
+	const char *commit_buffer, *body, *value;
 	struct strbuf ident = STRBUF_INIT;
 
 	if (!log->trailers.nr)
@@ -190,7 +190,10 @@  static void insert_records_from_trailers(struct shortlog *log,
 
 	trailer_iterator_init(&iter, body);
 	while (trailer_iterator_advance(&iter)) {
-		const char *value = iter.val.buf;
+		if (!iter.is_trailer)
+			continue;
+
+		value = iter.val.buf;
 
 		if (!string_list_has_string(&log->trailers, iter.key.buf))
 			continue;
diff --git a/sequencer.c b/sequencer.c
index 3cc88d8a800..bc7c82c5271 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,37 +319,32 @@  static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
-	int found_sob = 0, found_sob_last = 0;
-	char saved_char;
-
-	opts.no_divider = 1;
+	struct trailer_iterator iter;
+	size_t i = 0, found_sob = 0;
+	char saved_char = sb->buf[sb->len - ignore_footer];
 
 	if (ignore_footer) {
-		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&info, sb->buf, &opts);
+	trailer_iterator_init(&iter, sb->buf);
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob &&
+		    iter.is_trailer &&
+		    !strncmp(iter.raw, sob->buf, sob->len)) {
+			found_sob = i;
+		}
+	}
+	trailer_iterator_release(&iter);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
+	if (!i)
 		return 0;
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
-
-	trailer_info_release(&info);
-
-	if (found_sob_last)
+	if (found_sob == i)
 		return 3;
 	if (found_sob)
 		return 2;
diff --git a/trailer.c b/trailer.c
index 71ea2bb67f8..5bcc9b0006c 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1158,17 +1158,18 @@  void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
-
+	char *line;
+	int separator_pos;
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		line = iter->internal.info.trailers[iter->internal.cur++];
+		separator_pos = find_separator(line, separators);
+		iter->is_trailer = (separator_pos > 0);
+
+		iter->raw = line;
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		unfold_value(&iter->val);
 		return 1;
 	}
diff --git a/trailer.h b/trailer.h
index 244f29fc91f..a7599067acc 100644
--- a/trailer.h
+++ b/trailer.h
@@ -127,6 +127,19 @@  struct trailer_iterator {
 	struct strbuf key;
 	struct strbuf val;
 
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair as part of a trailer block. A trailer block can be
+	 * either 100% trailer lines, or mixed in with non-trailer lines (in
+	 * which case at least 25% must be trailer lines).
+	 */
+	const char *raw;
+
+	/*
+	 * 1 if the raw line was parsed as a trailer line (key/val pair).
+	 */
+	int is_trailer;
+
 	/* private */
 	struct {
 		struct trailer_info info;