diff mbox series

[05/10] sequencer: use the trailer iterator

Message ID fd4a9d54d9522973a4c22e43cb1d7964033d4837.1704869487.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Enrich Trailer API | expand

Commit Message

Linus Arver Jan. 10, 2024, 6:51 a.m. UTC
From: Linus Arver <linusa@google.com>

This patch allows for the removal of "trailer_info_get()" from the
trailer.h API, which will be in the next patch.

Instead of calling "trailer_info_get()", which is a low-level function
in the trailers implementation (trailer.c), call
trailer_iterator_advance(), which was specifically designed for public
consumption in f0939a0eb1 (trailer: add interface for iterating over
commit trailers, 2020-09-27).

Avoiding "trailer_info_get()" means we don't have to worry about options
like "no_divider" (relevant for parsing trailers). We also don't have to
check for things like "info.trailer_start == info.trailer_end" to see
whether there were any trailers (instead we can just check to see
whether the iterator advanced at all).

Also, teach the iterator about non-trailer lines, by adding a new field
called "raw" to hold both trailer and non-trailer lines. This is
necessary because a "trailer block" is a list of trailer lines of at
least 25% trailers (see 146245063e (trailer: allow non-trailers in
trailer block, 2016-10-21)), such that it may hold non-trailer lines.

Signed-off-by: Linus Arver <linusa@google.com>
---
 builtin/shortlog.c |  7 +++++--
 sequencer.c        | 35 +++++++++++++++--------------------
 trailer.c          | 20 ++++++++++++--------
 trailer.h          | 13 +++++++++++++
 4 files changed, 45 insertions(+), 30 deletions(-)

Comments

Junio C Hamano Jan. 19, 2024, 12:45 a.m. UTC | #1
"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Linus Arver <linusa@google.com>
>
> This patch allows for the removal of "trailer_info_get()" from the
> trailer.h API, which will be in the next patch.
>
> Instead of calling "trailer_info_get()", which is a low-level function
> in the trailers implementation (trailer.c), call
> trailer_iterator_advance(), which was specifically designed for public
> consumption in f0939a0eb1 (trailer: add interface for iterating over
> commit trailers, 2020-09-27).
>
> Avoiding "trailer_info_get()" means we don't have to worry about options
> like "no_divider" (relevant for parsing trailers). We also don't have to
> check for things like "info.trailer_start == info.trailer_end" to see
> whether there were any trailers (instead we can just check to see
> whether the iterator advanced at all).

Nice.

> Also, teach the iterator about non-trailer lines, by adding a new field
> called "raw" to hold both trailer and non-trailer lines. This is
> necessary because a "trailer block" is a list of trailer lines of at
> least 25% trailers (see 146245063e (trailer: allow non-trailers in
> trailer block, 2016-10-21)), such that it may hold non-trailer lines.

OK.  This would change behaviour, wouldn't it, in the sense that we
used to yield a non-trailer line from the old iterator but the new
one skips them?  Is that something we can demonstrate and protect in
tests (presumably once we conclude these refactoring, we would be
able to call into this machinery from unit-testing framework)?

Thanks.

> Signed-off-by: Linus Arver <linusa@google.com>
> ---
>  builtin/shortlog.c |  7 +++++--
>  sequencer.c        | 35 +++++++++++++++--------------------
>  trailer.c          | 20 ++++++++++++--------
>  trailer.h          | 13 +++++++++++++
>  4 files changed, 45 insertions(+), 30 deletions(-)
>
> diff --git a/builtin/shortlog.c b/builtin/shortlog.c
> index 1307ed2b88a..dc8fd5a5532 100644
> --- a/builtin/shortlog.c
> +++ b/builtin/shortlog.c
> @@ -172,7 +172,7 @@ static void insert_records_from_trailers(struct shortlog *log,
>  					 const char *oneline)
>  {
>  	struct trailer_iterator iter;
> -	const char *commit_buffer, *body;
> +	const char *commit_buffer, *body, *value;
>  	struct strbuf ident = STRBUF_INIT;
>  
>  	if (!log->trailers.nr)
> @@ -190,7 +190,10 @@ static void insert_records_from_trailers(struct shortlog *log,
>  
>  	trailer_iterator_init(&iter, body);
>  	while (trailer_iterator_advance(&iter)) {
> -		const char *value = iter.val.buf;
> +		if (!iter.is_trailer)
> +			continue;
> +
> +		value = iter.val.buf;
>  
>  		if (!string_list_has_string(&log->trailers, iter.key.buf))
>  			continue;
> diff --git a/sequencer.c b/sequencer.c
> index 3cc88d8a800..d199869cda9 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -319,37 +319,32 @@ static const char *get_todo_path(const struct replay_opts *opts)
>  static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
>  	size_t ignore_footer)
>  {
> -	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
> -	struct trailer_info info;
> -	size_t i;
> -	int found_sob = 0, found_sob_last = 0;
> -	char saved_char;
> -
> -	opts.no_divider = 1;
> +	struct trailer_iterator iter;
> +	size_t i = 0, found_sob = 0;
> +	char saved_char = sb->buf[sb->len - ignore_footer];
>  
>  	if (ignore_footer) {
> -		saved_char = sb->buf[sb->len - ignore_footer];
>  		sb->buf[sb->len - ignore_footer] = '\0';
>  	}
>  
> -	trailer_info_get(&info, sb->buf, &opts);
> +	trailer_iterator_init(&iter, sb->buf);
> +	while (trailer_iterator_advance(&iter)) {
> +		i++;
> +		if (sob &&
> +		    iter.is_trailer &&
> +		    !strncmp(iter.raw.buf, sob->buf, sob->len)) {
> +			found_sob = i;
> +		}
> +	}
> +	trailer_iterator_release(&iter);
>  
>  	if (ignore_footer)
>  		sb->buf[sb->len - ignore_footer] = saved_char;
>  
> -	if (info.trailer_block_start == info.trailer_block_end)
> +	if (!i)
>  		return 0;
>  
> -	for (i = 0; i < info.trailer_nr; i++)
> -		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
> -			found_sob = 1;
> -			if (i == info.trailer_nr - 1)
> -				found_sob_last = 1;
> -		}
> -
> -	trailer_info_release(&info);
> -
> -	if (found_sob_last)
> +	if (found_sob == i)
>  		return 3;
>  	if (found_sob)
>  		return 2;
> diff --git a/trailer.c b/trailer.c
> index 132f22b3dd7..593717fd56c 100644
> --- a/trailer.c
> +++ b/trailer.c
> @@ -1151,6 +1151,7 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>  	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
>  	strbuf_init(&iter->key, 0);
>  	strbuf_init(&iter->val, 0);
> +	strbuf_init(&iter->raw, 0);
>  	opts.no_divider = 1;
>  	trailer_info_get(&iter->internal.info, msg, &opts);
>  	iter->internal.cur = 0;
> @@ -1158,17 +1159,19 @@ void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
>  
>  int trailer_iterator_advance(struct trailer_iterator *iter)
>  {
> -	while (iter->internal.cur < iter->internal.info.trailer_nr) {
> -		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
> -		int separator_pos = find_separator(trailer, separators);
> -
> -		if (separator_pos < 1)
> -			continue; /* not a real trailer */
> -
> +	char *line;
> +	int separator_pos;
> +	if (iter->internal.cur < iter->internal.info.trailer_nr) {
> +		line = iter->internal.info.trailers[iter->internal.cur++];
> +		separator_pos = find_separator(line, separators);
> +		iter->is_trailer = (separator_pos > 0);
> +
> +		strbuf_reset(&iter->raw);
> +		strbuf_addstr(&iter->raw, line);
>  		strbuf_reset(&iter->key);
>  		strbuf_reset(&iter->val);
>  		parse_trailer(&iter->key, &iter->val, NULL,
> -			      trailer, separator_pos);
> +			      line, separator_pos);
>  		unfold_value(&iter->val);
>  		return 1;
>  	}
> @@ -1180,4 +1183,5 @@ void trailer_iterator_release(struct trailer_iterator *iter)
>  	trailer_info_release(&iter->internal.info);
>  	strbuf_release(&iter->val);
>  	strbuf_release(&iter->key);
> +	strbuf_release(&iter->raw);
>  }
> diff --git a/trailer.h b/trailer.h
> index 50f70556302..d50c9fd79b2 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -127,6 +127,19 @@ struct trailer_iterator {
>  	struct strbuf key;
>  	struct strbuf val;
>  
> +	/*
> +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +	 * key/val pair. This field can contain non-trailer lines because it's
> +	 * valid for a trailer block to contain such lines (i.e., we only
> +	 * require 25% of the lines in a trailer block to be trailer lines).
> +	 */
> +	struct strbuf raw;
> +
> +	/*
> +	 * 1 if the raw line was parsed as a separate key/val pair.
> +	 */
> +	int is_trailer;
> +
>  	/* private */
>  	struct {
>  		struct trailer_info info;
Linus Arver Jan. 20, 2024, 8:04 p.m. UTC | #2
Junio C Hamano <gitster@pobox.com> writes:

>> From: Linus Arver <linusa@google.com>
>> Also, teach the iterator about non-trailer lines, by adding a new field
>> called "raw" to hold both trailer and non-trailer lines. This is
>> necessary because a "trailer block" is a list of trailer lines of at
>> least 25% trailers (see 146245063e (trailer: allow non-trailers in
>> trailer block, 2016-10-21)), such that it may hold non-trailer lines.
>
> OK.  This would change behaviour, wouldn't it, in the sense that we
> used to yield a non-trailer line from the old iterator but the new
> one skips them?

I think it's the other way; the old iterator only iterated over trailer
lines, skipping over non-trailer lines (see the "not a real trailer"
deleted bit for trailer_iterator_advance()). The new one iterates over
all lines found in the trailer block, whether they are trailer or
non-trailer lines.

The function insert_records_from_trailers() from shortlog.c uses the new
iterator, but has to be careful because the new iterator goes over
non-trailer lines too. That's why it now does

    if (!iter.is_trailer)
            continue;

to do the skipping itself.

> Is that something we can demonstrate and protect in
> tests (presumably once we conclude these refactoring, we would be
> able to call into this machinery from unit-testing framework)?

Yup, that is exactly what I want to do once the dust around the trailer
API settles down after this series (and also the larger series). :)
Linus Arver Jan. 22, 2024, 11:22 p.m. UTC | #3
"Linus Arver via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/trailer.h b/trailer.h
> index 50f70556302..d50c9fd79b2 100644
> --- a/trailer.h
> +++ b/trailer.h
> @@ -127,6 +127,19 @@ struct trailer_iterator {
>  	struct strbuf key;
>  	struct strbuf val;
>  
> +	/*
> +	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
> +	 * key/val pair. This field can contain non-trailer lines because it's
> +	 * valid for a trailer block to contain such lines (i.e., we only
> +	 * require 25% of the lines in a trailer block to be trailer lines).
> +	 */
> +	struct strbuf raw;

Originally I used a strbuf here for consistency with the other strbufs
used in the iterator for the key and val members. But now I've realized
that there's no need to make "raw" a strbuf at all, because iterator
users will never need to manipulate the string that this points to. Will
change to just "const char *" in the reroll.
diff mbox series

Patch

diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 1307ed2b88a..dc8fd5a5532 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -172,7 +172,7 @@  static void insert_records_from_trailers(struct shortlog *log,
 					 const char *oneline)
 {
 	struct trailer_iterator iter;
-	const char *commit_buffer, *body;
+	const char *commit_buffer, *body, *value;
 	struct strbuf ident = STRBUF_INIT;
 
 	if (!log->trailers.nr)
@@ -190,7 +190,10 @@  static void insert_records_from_trailers(struct shortlog *log,
 
 	trailer_iterator_init(&iter, body);
 	while (trailer_iterator_advance(&iter)) {
-		const char *value = iter.val.buf;
+		if (!iter.is_trailer)
+			continue;
+
+		value = iter.val.buf;
 
 		if (!string_list_has_string(&log->trailers, iter.key.buf))
 			continue;
diff --git a/sequencer.c b/sequencer.c
index 3cc88d8a800..d199869cda9 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -319,37 +319,32 @@  static const char *get_todo_path(const struct replay_opts *opts)
 static int has_conforming_footer(struct strbuf *sb, struct strbuf *sob,
 	size_t ignore_footer)
 {
-	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
-	struct trailer_info info;
-	size_t i;
-	int found_sob = 0, found_sob_last = 0;
-	char saved_char;
-
-	opts.no_divider = 1;
+	struct trailer_iterator iter;
+	size_t i = 0, found_sob = 0;
+	char saved_char = sb->buf[sb->len - ignore_footer];
 
 	if (ignore_footer) {
-		saved_char = sb->buf[sb->len - ignore_footer];
 		sb->buf[sb->len - ignore_footer] = '\0';
 	}
 
-	trailer_info_get(&info, sb->buf, &opts);
+	trailer_iterator_init(&iter, sb->buf);
+	while (trailer_iterator_advance(&iter)) {
+		i++;
+		if (sob &&
+		    iter.is_trailer &&
+		    !strncmp(iter.raw.buf, sob->buf, sob->len)) {
+			found_sob = i;
+		}
+	}
+	trailer_iterator_release(&iter);
 
 	if (ignore_footer)
 		sb->buf[sb->len - ignore_footer] = saved_char;
 
-	if (info.trailer_block_start == info.trailer_block_end)
+	if (!i)
 		return 0;
 
-	for (i = 0; i < info.trailer_nr; i++)
-		if (sob && !strncmp(info.trailers[i], sob->buf, sob->len)) {
-			found_sob = 1;
-			if (i == info.trailer_nr - 1)
-				found_sob_last = 1;
-		}
-
-	trailer_info_release(&info);
-
-	if (found_sob_last)
+	if (found_sob == i)
 		return 3;
 	if (found_sob)
 		return 2;
diff --git a/trailer.c b/trailer.c
index 132f22b3dd7..593717fd56c 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1151,6 +1151,7 @@  void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
 	strbuf_init(&iter->key, 0);
 	strbuf_init(&iter->val, 0);
+	strbuf_init(&iter->raw, 0);
 	opts.no_divider = 1;
 	trailer_info_get(&iter->internal.info, msg, &opts);
 	iter->internal.cur = 0;
@@ -1158,17 +1159,19 @@  void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
 
 int trailer_iterator_advance(struct trailer_iterator *iter)
 {
-	while (iter->internal.cur < iter->internal.info.trailer_nr) {
-		char *trailer = iter->internal.info.trailers[iter->internal.cur++];
-		int separator_pos = find_separator(trailer, separators);
-
-		if (separator_pos < 1)
-			continue; /* not a real trailer */
-
+	char *line;
+	int separator_pos;
+	if (iter->internal.cur < iter->internal.info.trailer_nr) {
+		line = iter->internal.info.trailers[iter->internal.cur++];
+		separator_pos = find_separator(line, separators);
+		iter->is_trailer = (separator_pos > 0);
+
+		strbuf_reset(&iter->raw);
+		strbuf_addstr(&iter->raw, line);
 		strbuf_reset(&iter->key);
 		strbuf_reset(&iter->val);
 		parse_trailer(&iter->key, &iter->val, NULL,
-			      trailer, separator_pos);
+			      line, separator_pos);
 		unfold_value(&iter->val);
 		return 1;
 	}
@@ -1180,4 +1183,5 @@  void trailer_iterator_release(struct trailer_iterator *iter)
 	trailer_info_release(&iter->internal.info);
 	strbuf_release(&iter->val);
 	strbuf_release(&iter->key);
+	strbuf_release(&iter->raw);
 }
diff --git a/trailer.h b/trailer.h
index 50f70556302..d50c9fd79b2 100644
--- a/trailer.h
+++ b/trailer.h
@@ -127,6 +127,19 @@  struct trailer_iterator {
 	struct strbuf key;
 	struct strbuf val;
 
+	/*
+	 * Raw line (e.g., "foo: bar baz") before being parsed as a trailer
+	 * key/val pair. This field can contain non-trailer lines because it's
+	 * valid for a trailer block to contain such lines (i.e., we only
+	 * require 25% of the lines in a trailer block to be trailer lines).
+	 */
+	struct strbuf raw;
+
+	/*
+	 * 1 if the raw line was parsed as a separate key/val pair.
+	 */
+	int is_trailer;
+
 	/* private */
 	struct {
 		struct trailer_info info;