diff mbox series

[v2,1/4] revision: improve commit_rewrite_person()

Message ID 20220707161554.6900-2-siddharthasthana31@gmail.com (mailing list archive)
State Superseded
Headers show
Series Add support for mailmap in cat-file | expand

Commit Message

Siddharth Asthana July 7, 2022, 4:15 p.m. UTC
The function, commit_rewrite_person(), is designed to find and replace
an ident string in the header part, and the way it avoids a random
occuranace of "author A U Thor <author@example.com" in the text is by
insisting "author" to appear at the beginning of line by passing
"\nauthor " as "what".

The implementation also doesn't make any effort to limit itself to the
commit header by locating the blank line that appears after the header
part and stopping the search there. Also, the interface forces the
caller to make multiple calls if it wants to rewrite idents on multiple
headers. It shouldn't be the case.

To support the existing caller better, update commit_rewrite_person()
to:
- Make a single pass in the input buffer to locate headers named
  "author" and "committer" and replace idents on them.
- Stop at the end of the header, ensuring that nothing in the body of
  the commit object is modified.

The return type of the function commit_rewrite_person() has also been
changed from int to void. This has been done because the caller of the
function doesn't do anything with the return value of the function.

By simplyfying the interface of the commit_rewrite_person(), we also
intend to expose it as a public function. We will also be renaming the
function in a future commit to a different name which clearly tells that
the function replaces idents in the header of the commit buffer.

Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: John Cai <johncai86@gmail.com>
Signed-off-by: Siddharth Asthana <siddharthasthana31@gmail.com>
---
 revision.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

Comments

Junio C Hamano July 7, 2022, 9:52 p.m. UTC | #1
Siddharth Asthana <siddharthasthana31@gmail.com> writes:

> @@ -3835,8 +3859,8 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
>  		if (!buf.len)
>  			strbuf_addstr(&buf, message);
>  
> -		commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
> -		commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
> +		const char *commit_headers[] = { "author ", "committer ", NULL };

This is decl-after-statement our codebase avoids.

> +		commit_rewrite_person(&buf, commit_headers, opt->mailmap);
>  	}
>  
>  	/* Append "fake" message parts as needed */
Đoàn Trần Công Danh July 8, 2022, 2:50 p.m. UTC | #2
On 2022-07-07 21:45:51+0530, Siddharth Asthana <siddharthasthana31@gmail.com> wrote:
> The function, commit_rewrite_person(), is designed to find and replace
> an ident string in the header part, and the way it avoids a random
> occuranace of "author A U Thor <author@example.com" in the text is by

s/occuranace/occurrence/

> insisting "author" to appear at the beginning of line by passing
> "\nauthor " as "what".
> 
> The implementation also doesn't make any effort to limit itself to the
> commit header by locating the blank line that appears after the header
> part and stopping the search there. Also, the interface forces the
> caller to make multiple calls if it wants to rewrite idents on multiple
> headers. It shouldn't be the case.
> 
> To support the existing caller better, update commit_rewrite_person()
> to:
> - Make a single pass in the input buffer to locate headers named
>   "author" and "committer" and replace idents on them.
> - Stop at the end of the header, ensuring that nothing in the body of
>   the commit object is modified.
> 
> The return type of the function commit_rewrite_person() has also been
> changed from int to void. This has been done because the caller of the
> function doesn't do anything with the return value of the function.
> 
> By simplyfying the interface of the commit_rewrite_person(), we also

s/simplyfying/simplifying/

> intend to expose it as a public function. We will also be renaming the
> function in a future commit to a different name which clearly tells that
> the function replaces idents in the header of the commit buffer.
> 
> Mentored-by: Christian Couder <christian.couder@gmail.com>
> Mentored-by: John Cai <johncai86@gmail.com>
> Signed-off-by: Siddharth Asthana <siddharthasthana31@gmail.com>
> ---
>  revision.c | 44 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 34 insertions(+), 10 deletions(-)
> 
> diff --git a/revision.c b/revision.c
> index 211352795c..83e68c1f97 100644
> --- a/revision.c
> +++ b/revision.c
> @@ -3755,19 +3755,17 @@ int rewrite_parents(struct rev_info *revs, struct commit *commit,
>  	return 0;
>  }
>  
> -static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap)
> +/*
> + * Returns the difference between the new and old length of the ident line.
> + */
> +static ssize_t rewrite_ident_line(const char* person, struct strbuf *buf, struct string_list *mailmap)
>  {
> -	char *person, *endp;
> +	char *endp;
>  	size_t len, namelen, maillen;
>  	const char *name;
>  	const char *mail;
>  	struct ident_split ident;
>  
> -	person = strstr(buf->buf, what);
> -	if (!person)
> -		return 0;
> -
> -	person += strlen(what);
>  	endp = strchr(person, '\n');
>  	if (!endp)
>  		return 0;
> @@ -3784,6 +3782,7 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
>  
>  	if (map_user(mailmap, &mail, &maillen, &name, &namelen)) {
>  		struct strbuf namemail = STRBUF_INIT;
> +		size_t newlen;
>  
>  		strbuf_addf(&namemail, "%.*s <%.*s>",
>  			    (int)namelen, name, (int)maillen, mail);
> @@ -3792,14 +3791,39 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
>  			      ident.mail_end - ident.name_begin + 1,
>  			      namemail.buf, namemail.len);
>  
> +		newlen = namemail.len;
> +
>  		strbuf_release(&namemail);
>  
> -		return 1;
> +		return newlen - (ident.mail_end - ident.name_begin + 1);
>  	}
>  
>  	return 0;
>  }
>  
> +static void commit_rewrite_person(struct strbuf *buf, const char **headers, struct string_list *mailmap)
> +{
> +	size_t buf_offset = 0;
> +
> +	if (!mailmap)
> +		return;
> +
> +	for (;;) {
> +		const char *person, *line = buf->buf + buf_offset;
> +		int i, linelen = strchrnul(line, '\n') - line + 1;

Would you mind to change those lines to avoid mixed of declaration and
expression. Also, I think i and linelen should be ssize_t instead.
Something like:

		const char *person, *line;
		ssize_t i, linelen;

		line = buf->buf + buf_offset;
		linelen = strchrnul(line, '\n') - line + 1;

> +
> +		if (!linelen || linelen == 1)
> +			/* End of header */
> +			return;

And I think linelen will never be 0 or negative,
even if linelen could be 0, I think we want "linelen != 0"
for integer comparision.

> +
> +		buf_offset += linelen;
> +
> +		for (i = 0; headers[i]; i++)
> +			if (skip_prefix(line, headers[i], &person))
> +				buf_offset += rewrite_ident_line(person, buf, mailmap);
> +	}
> +}
> +
>  static int commit_match(struct commit *commit, struct rev_info *opt)
>  {
>  	int retval;
> @@ -3835,8 +3859,8 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
>  		if (!buf.len)
>  			strbuf_addstr(&buf, message);
>  
> -		commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
> -		commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
> +		const char *commit_headers[] = { "author ", "committer ", NULL };
> +		commit_rewrite_person(&buf, commit_headers, opt->mailmap);
>  	}
>  
>  	/* Append "fake" message parts as needed */
> -- 
> 2.37.0.6.ga6a61a26c1.dirty
>
Đoàn Trần Công Danh July 9, 2022, 1:02 a.m. UTC | #3
Add list back to cc

On 2022-07-08 23:23:07+0200, Christian Couder <christian.couder@gmail.com> wrote:
> On Fri, Jul 8, 2022 at 4:50 PM Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
> >
> > On 2022-07-07 21:45:51+0530, Siddharth Asthana <siddharthasthana31@gmail.com> wrote:
>
> > > By simplyfying the interface of the commit_rewrite_person(), we also
> >
> > s/simplyfying/simplifying/
>
> Thanks for noticing the typos!
>
> > > +static void commit_rewrite_person(struct strbuf *buf, const char **headers, struct string_list *mailmap)
> > > +{
> > > +     size_t buf_offset = 0;
> > > +
> > > +     if (!mailmap)
> > > +             return;
> > > +
> > > +     for (;;) {
> > > +             const char *person, *line = buf->buf + buf_offset;
> > > +             int i, linelen = strchrnul(line, '\n') - line + 1;
> >
> > Would you mind to change those lines to avoid mixed of declaration and
> > expression.
>
> I am not sure we have some clear guidelines on this.

Yes, we don't have a clear guidelines on this, but this would sure
matches into mixed declaration and expression. And some variables are
initialized and some aren't in the same line. I was confused in my
first glance.

>
> > Also, I think i and linelen should be ssize_t instead.
>
> Could you explain why?
>
> I think 'i' is changed only in:
>
> for (i = 0; headers[i]; i++)
>
> and therefore cannot be négative.
>
> While linelen is set only in:
>
> linelen = strchrnul(line, '\n') - line + 1;
>
> and therefore cannot be négative either.


Yes, both of them can't be negative. As I explained in the part you
removed.  However, I choose ssize_t in my reply because it's
a ptrdiff_t.

So, size_t is an obviously a better choice.
Either size_t and ssize_t could be used in this case, but not int.


>>> +
>>> +             if (!linelen || linelen == 1)
>>> +                     /* End of header */
>>> +                     return;
>>
>>And I think linelen will never be 0 or negative,
>>even if linelen could be 0, I think we want "linelen != 0"
>>for integer comparision.
Christian Couder July 9, 2022, 5:04 a.m. UTC | #4
On Sat, Jul 9, 2022 at 3:02 AM Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
>
> Add list back to cc

Sorry for not keeping the list in Cc by mistake.

> On 2022-07-08 23:23:07+0200, Christian Couder <christian.couder@gmail.com> wrote:
> > On Fri, Jul 8, 2022 at 4:50 PM Đoàn Trần Công Danh <congdanhqx@gmail.com> wrote:
> > >
> > > On 2022-07-07 21:45:51+0530, Siddharth Asthana <siddharthasthana31@gmail.com> wrote:
> >
> > > > By simplyfying the interface of the commit_rewrite_person(), we also
> > >
> > > s/simplyfying/simplifying/
> >
> > Thanks for noticing the typos!
> >
> > > > +static void commit_rewrite_person(struct strbuf *buf, const char **headers, struct string_list *mailmap)
> > > > +{
> > > > +     size_t buf_offset = 0;
> > > > +
> > > > +     if (!mailmap)
> > > > +             return;
> > > > +
> > > > +     for (;;) {
> > > > +             const char *person, *line = buf->buf + buf_offset;
> > > > +             int i, linelen = strchrnul(line, '\n') - line + 1;
> > >
> > > Would you mind to change those lines to avoid mixed of declaration and
> > > expression.
> >
> > I am not sure we have some clear guidelines on this.
>
> Yes, we don't have a clear guidelines on this, but this would sure
> matches into mixed declaration and expression. And some variables are
> initialized and some aren't in the same line. I was confused in my
> first glance.

Yeah, it might be clearer to avoid having some variables both declared
and initialized while others are only declared on the same line.

> > > Also, I think i and linelen should be ssize_t instead.
> >
> > Could you explain why?
> >
> > I think 'i' is changed only in:
> >
> > for (i = 0; headers[i]; i++)
> >
> > and therefore cannot be négative.
> >
> > While linelen is set only in:
> >
> > linelen = strchrnul(line, '\n') - line + 1;
> >
> > and therefore cannot be négative either.
>
> Yes, both of them can't be negative. As I explained in the part you
> removed.  However, I choose ssize_t in my reply because it's
> a ptrdiff_t.
>
> So, size_t is an obviously a better choice.
> Either size_t and ssize_t could be used in this case, but not int.

I am Ok with size_t. Thanks for the explanations!

> >>> +
> >>> +             if (!linelen || linelen == 1)
> >>> +                     /* End of header */
> >>> +                     return;
> >>
> >>And I think linelen will never be 0 or negative,
> >>even if linelen could be 0, I think we want "linelen != 0"
> >>for integer comparision.

maybe `if (linelen <=1)` is just a simpler check.
diff mbox series

Patch

diff --git a/revision.c b/revision.c
index 211352795c..83e68c1f97 100644
--- a/revision.c
+++ b/revision.c
@@ -3755,19 +3755,17 @@  int rewrite_parents(struct rev_info *revs, struct commit *commit,
 	return 0;
 }
 
-static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap)
+/*
+ * Returns the difference between the new and old length of the ident line.
+ */
+static ssize_t rewrite_ident_line(const char* person, struct strbuf *buf, struct string_list *mailmap)
 {
-	char *person, *endp;
+	char *endp;
 	size_t len, namelen, maillen;
 	const char *name;
 	const char *mail;
 	struct ident_split ident;
 
-	person = strstr(buf->buf, what);
-	if (!person)
-		return 0;
-
-	person += strlen(what);
 	endp = strchr(person, '\n');
 	if (!endp)
 		return 0;
@@ -3784,6 +3782,7 @@  static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
 
 	if (map_user(mailmap, &mail, &maillen, &name, &namelen)) {
 		struct strbuf namemail = STRBUF_INIT;
+		size_t newlen;
 
 		strbuf_addf(&namemail, "%.*s <%.*s>",
 			    (int)namelen, name, (int)maillen, mail);
@@ -3792,14 +3791,39 @@  static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
 			      ident.mail_end - ident.name_begin + 1,
 			      namemail.buf, namemail.len);
 
+		newlen = namemail.len;
+
 		strbuf_release(&namemail);
 
-		return 1;
+		return newlen - (ident.mail_end - ident.name_begin + 1);
 	}
 
 	return 0;
 }
 
+static void commit_rewrite_person(struct strbuf *buf, const char **headers, struct string_list *mailmap)
+{
+	size_t buf_offset = 0;
+
+	if (!mailmap)
+		return;
+
+	for (;;) {
+		const char *person, *line = buf->buf + buf_offset;
+		int i, linelen = strchrnul(line, '\n') - line + 1;
+
+		if (!linelen || linelen == 1)
+			/* End of header */
+			return;
+
+		buf_offset += linelen;
+
+		for (i = 0; headers[i]; i++)
+			if (skip_prefix(line, headers[i], &person))
+				buf_offset += rewrite_ident_line(person, buf, mailmap);
+	}
+}
+
 static int commit_match(struct commit *commit, struct rev_info *opt)
 {
 	int retval;
@@ -3835,8 +3859,8 @@  static int commit_match(struct commit *commit, struct rev_info *opt)
 		if (!buf.len)
 			strbuf_addstr(&buf, message);
 
-		commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
-		commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
+		const char *commit_headers[] = { "author ", "committer ", NULL };
+		commit_rewrite_person(&buf, commit_headers, opt->mailmap);
 	}
 
 	/* Append "fake" message parts as needed */