diff mbox series

[v5,1/4] revision: improve commit_rewrite_person()

Message ID 20220716074055.1786231-2-siddharthasthana31@gmail.com (mailing list archive)
State Superseded
Headers show
Series Add support for mailmap in cat-file | expand

Commit Message

Siddharth Asthana July 16, 2022, 7:40 a.m. UTC
The function, commit_rewrite_person(), is designed to find and replace
an ident string in the header part, and the way it avoids a random
occurrence of "author A U Thor <author@example.com" in the text is by
insisting "author" to appear at the beginning of line by passing
"\nauthor " as "what".

The implementation also doesn't make any effort to limit itself to the
commit header by locating the blank line that appears after the header
part and stopping the search there. Also, the interface forces the
caller to make multiple calls if it wants to rewrite idents on multiple
headers. It shouldn't be the case.

To support the existing caller better, update commit_rewrite_person()
to:
- Make a single pass in the input buffer to locate headers named
  "author" and "committer" and replace idents on them.
- Stop at the end of the header, ensuring that nothing in the body of
  the commit object is modified.

The return type of the function commit_rewrite_person() has also been
changed from int to void. This has been done because the caller of the
function doesn't do anything with the return value of the function.

By simplifying the interface of the commit_rewrite_person(), we also
intend to expose it as a public function. We will also be renaming the
function in a future commit to a different name which clearly tells that
the function replaces idents in the header of the commit buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: John Cai <johncai86@gmail.com>
Helped-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
Helped-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Siddharth Asthana <siddharthasthana31@gmail.com>
---
 revision.c | 51 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

Comments

Junio C Hamano July 17, 2022, 10:11 p.m. UTC | #1
Siddharth Asthana <siddharthasthana31@gmail.com> writes:

> +/*
> + * Returns the difference between the new and old length of the ident line.
> + */
> +static ssize_t rewrite_ident_line(const char *person, struct strbuf *buf,
> +								  struct string_list *mailmap)

Line-folding is a good idea, but why do we use such a deep
indentation?  In this project, tab-width is 8.

> +static void commit_rewrite_person(struct strbuf *buf, const char **header,
> +								  struct string_list *mailmap)

Likewise.

> +{
> +	size_t buf_offset = 0;
> +
> +	if (!mailmap)
> +		return;
> +
> +	for (;;) {
> +		const char *person, *line;
> +		size_t i;
> +
> +		line = buf->buf + buf_offset;
> +		if (!*line || *line == '\n')
> +			return; /* End of header */
> +
> +		for (i = 0; header[i]; i++)
> +			if (skip_prefix(line, header[i], &person)) {
> +				rewrite_ident_line(person, buf, mailmap);

If the return value of rewrite_ident_line() is never used, perhaps
stop computing the return value in that function and make it return
"void".  I personally thought it was clever to return "how much does
the ident part grew/shrunk?" from the helper and use it to adjust,
but I do not mind to scrap the clever-ness if some folks may find it
harder to understand.

> +				break;
> +			}
> +
> +		buf_offset = strchrnul(buf->buf + buf_offset, '\n') - buf->buf;

And this is a "easier-to-understand but need to scan the buffer once
again, only to figure out what we ought to already know" version.

> +		if (buf->buf[buf_offset] == '\n')
> +			++buf_offset;

Prefer post-increment when there is no reason to favor pre-increment
over post-increment, i.e. write this as "buf_offset++".

> +	}
> +}
> +
>  static int commit_match(struct commit *commit, struct rev_info *opt)
>  {
>  	int retval;
> @@ -3832,11 +3862,12 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
>  		strbuf_addstr(&buf, message);
>  
>  	if (opt->grep_filter.header_list && opt->mailmap) {
> +		const char *commit_headers[] = { "author ", "committer ", NULL };
> +
>  		if (!buf.len)
>  			strbuf_addstr(&buf, message);
>  
> -		commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
> -		commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
> +		commit_rewrite_person(&buf, commit_headers, opt->mailmap);
>  	}
>  
>  	/* Append "fake" message parts as needed */
diff mbox series

Patch

diff --git a/revision.c b/revision.c
index 211352795c..9909da928e 100644
--- a/revision.c
+++ b/revision.c
@@ -3755,19 +3755,18 @@  int rewrite_parents(struct rev_info *revs, struct commit *commit,
 	return 0;
 }
 
-static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap)
+/*
+ * Returns the difference between the new and old length of the ident line.
+ */
+static ssize_t rewrite_ident_line(const char *person, struct strbuf *buf,
+								  struct string_list *mailmap)
 {
-	char *person, *endp;
+	char *endp;
 	size_t len, namelen, maillen;
 	const char *name;
 	const char *mail;
 	struct ident_split ident;
 
-	person = strstr(buf->buf, what);
-	if (!person)
-		return 0;
-
-	person += strlen(what);
 	endp = strchr(person, '\n');
 	if (!endp)
 		return 0;
@@ -3784,6 +3783,7 @@  static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
 
 	if (map_user(mailmap, &mail, &maillen, &name, &namelen)) {
 		struct strbuf namemail = STRBUF_INIT;
+		size_t newlen;
 
 		strbuf_addf(&namemail, "%.*s <%.*s>",
 			    (int)namelen, name, (int)maillen, mail);
@@ -3792,14 +3792,44 @@  static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
 			      ident.mail_end - ident.name_begin + 1,
 			      namemail.buf, namemail.len);
 
+		newlen = namemail.len;
+
 		strbuf_release(&namemail);
 
-		return 1;
+		return newlen - (ident.mail_end - ident.name_begin + 1);
 	}
 
 	return 0;
 }
 
+static void commit_rewrite_person(struct strbuf *buf, const char **header,
+								  struct string_list *mailmap)
+{
+	size_t buf_offset = 0;
+
+	if (!mailmap)
+		return;
+
+	for (;;) {
+		const char *person, *line;
+		size_t i;
+
+		line = buf->buf + buf_offset;
+		if (!*line || *line == '\n')
+			return; /* End of header */
+
+		for (i = 0; header[i]; i++)
+			if (skip_prefix(line, header[i], &person)) {
+				rewrite_ident_line(person, buf, mailmap);
+				break;
+			}
+
+		buf_offset = strchrnul(buf->buf + buf_offset, '\n') - buf->buf;
+		if (buf->buf[buf_offset] == '\n')
+			++buf_offset;
+	}
+}
+
 static int commit_match(struct commit *commit, struct rev_info *opt)
 {
 	int retval;
@@ -3832,11 +3862,12 @@  static int commit_match(struct commit *commit, struct rev_info *opt)
 		strbuf_addstr(&buf, message);
 
 	if (opt->grep_filter.header_list && opt->mailmap) {
+		const char *commit_headers[] = { "author ", "committer ", NULL };
+
 		if (!buf.len)
 			strbuf_addstr(&buf, message);
 
-		commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
-		commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
+		commit_rewrite_person(&buf, commit_headers, opt->mailmap);
 	}
 
 	/* Append "fake" message parts as needed */