diff mbox series

[v4,04/23] sequencer: reuse strbuf_trim() in read_oneliner()

Message ID c7a3cfa20005aeeedc27d2eb4af1e2c4470ad73d.1585962672.git.liu.denton@gmail.com (mailing list archive)
State New, archived
Headers show
Series merge: learn --autostash | expand

Commit Message

Denton Liu April 4, 2020, 1:11 a.m. UTC
In the original read_oneliner() logic, we duplicated the logic for
strbuf_trim_trailing_newline() with one exception: instead of preventing
buffer accesses below index 0, it would prevent buffer accesses below
index `orig_len`. Although this is correct, it isn't worth having the
duplicated logic around.

Reset `buf` before using it then replace the trimming logic with
strbuf_trim().

As a cleanup, remove all reset_strbuf()s that happen before
read_oneliner() is called.

Signed-off-by: Denton Liu <liu.denton@gmail.com>
---
 sequencer.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

Comments

Junio C Hamano April 5, 2020, 9:46 p.m. UTC | #1
Denton Liu <liu.denton@gmail.com> writes:

> @@ -2471,7 +2467,6 @@ void parse_strategy_opts(struct replay_opts *opts, char *raw_opts)
>  
>  static void read_strategy_opts(struct replay_opts *opts, struct strbuf *buf)
>  {
> -	strbuf_reset(buf);
>  	if (!read_oneliner(buf, rebase_path_strategy(), 0))
>  		return;
>  	opts->strategy = strbuf_detach(buf, NULL);
> @@ -2494,7 +2489,6 @@ static int read_populate_opts(struct replay_opts *opts)
>  				free(opts->gpg_sign);
>  				opts->gpg_sign = xstrdup(buf.buf + 2);
>  			}
> -			strbuf_reset(&buf);
>  		}
>  
>  		if (read_oneliner(&buf, rebase_path_allow_rerere_autoupdate(), 1)) {
> @@ -2526,7 +2520,6 @@ static int read_populate_opts(struct replay_opts *opts)
>  			opts->keep_redundant_commits = 1;
>  
>  		read_strategy_opts(opts, &buf);
> -		strbuf_reset(&buf);
>  

>  		if (read_oneliner(&opts->current_fixups,
>  				  rebase_path_current_fixups(), 1)) {

Is this conversion correct around here?  read_oneliner() used to
"append" what was read from the file to what is already in the
strbuf, and many strbuf_reset() in this function was because these
callers of read_oneliner() in this function that has strbuf_reset()
immediately before did *not* want the "append" semantics.  But this
one looks different.  Where in the original does the current_fixups
strbuf get emptied for this read_oneliner() to ignore the previous
contents?  Or are we relying on the caller not to have done anything
to current_fixups before it calls this function?

In other words, the original behaviour of read_oneliner() having the
"append" semantics suggests me that there were callers that wanted
to keep the current contents and append---this current_fixups may
not be one of them, but nevertheless, changing the semantics of the
function from "append" to "discard and read from scratch" without
vetting all the existing callers smells iffy to me.
Denton Liu April 6, 2020, 1:39 a.m. UTC | #2
Hi Junio,

On Sun, Apr 05, 2020 at 02:46:47PM -0700, Junio C Hamano wrote:
> Denton Liu <liu.denton@gmail.com> writes:
> 
> > @@ -2471,7 +2467,6 @@ void parse_strategy_opts(struct replay_opts *opts, char *raw_opts)
> >  
> >  static void read_strategy_opts(struct replay_opts *opts, struct strbuf *buf)
> >  {
> > -	strbuf_reset(buf);
> >  	if (!read_oneliner(buf, rebase_path_strategy(), 0))
> >  		return;
> >  	opts->strategy = strbuf_detach(buf, NULL);
> > @@ -2494,7 +2489,6 @@ static int read_populate_opts(struct replay_opts *opts)
> >  				free(opts->gpg_sign);
> >  				opts->gpg_sign = xstrdup(buf.buf + 2);
> >  			}
> > -			strbuf_reset(&buf);
> >  		}
> >  
> >  		if (read_oneliner(&buf, rebase_path_allow_rerere_autoupdate(), 1)) {
> > @@ -2526,7 +2520,6 @@ static int read_populate_opts(struct replay_opts *opts)
> >  			opts->keep_redundant_commits = 1;
> >  
> >  		read_strategy_opts(opts, &buf);
> > -		strbuf_reset(&buf);
> >  
> 
> >  		if (read_oneliner(&opts->current_fixups,
> >  				  rebase_path_current_fixups(), 1)) {
> 
> Is this conversion correct around here?  read_oneliner() used to
> "append" what was read from the file to what is already in the
> strbuf, and many strbuf_reset() in this function was because these
> callers of read_oneliner() in this function that has strbuf_reset()
> immediately before did *not* want the "append" semantics.  But this
> one looks different.  Where in the original does the current_fixups
> strbuf get emptied for this read_oneliner() to ignore the previous
> contents?  Or are we relying on the caller not to have done anything
> to current_fixups before it calls this function?

As far as I can tell, opts->current_fixups is always empty when
read_oneliner() is called here.

> In other words, the original behaviour of read_oneliner() having the
> "append" semantics suggests me that there were callers that wanted
> to keep the current contents and append---this current_fixups may
> not be one of them, but nevertheless, changing the semantics of the
> function from "append" to "discard and read from scratch" without
> vetting all the existing callers smells iffy to me.

Before making this change, I manually checked all invocations of
read_oneliner() and ensured that they all passed in an empty strbuf.
Same thing with "rebase: use read_oneliner()", I manually checked all of
those invocations as well. It's quite possible that I made a mistake,
though.

To be doubly sure that I caught everything, I ran the test suite on
'master' with this patch:

	diff --git a/builtin/rebase.c b/builtin/rebase.c
	index 27a07d4e78..a0c03dd1d6 100644
	--- a/builtin/rebase.c
	+++ b/builtin/rebase.c
	@@ -589,6 +589,9 @@ static const char *state_dir_path(const char *filename, struct rebase_options *o
	 /* Read one file, then strip line endings */
	 static int read_one(const char *path, struct strbuf *buf)
	 {
	+	if (buf->len)
	+		BUG("rebase change bad: %s", buf->buf);
	+
		if (strbuf_read_file(buf, path, 0) < 0)
			return error_errno(_("could not read '%s'"), path);
		strbuf_trim_trailing_newline(buf);
	diff --git a/sequencer.c b/sequencer.c
	index 6fd2674632..d7bc5c9c95 100644
	--- a/sequencer.c
	+++ b/sequencer.c
	@@ -433,6 +433,9 @@ static int read_oneliner(struct strbuf *buf,
	 {
		int orig_len = buf->len;
	 
	+	if (buf->len)
	+		BUG("sequencer change bad: %s", buf->buf);
	+
		if (!file_exists(path))
			return 0;

Thanks,

Denton
Phillip Wood April 6, 2020, 2:03 p.m. UTC | #3
Hi Denton

On 04/04/2020 02:11, Denton Liu wrote:
> In the original read_oneliner() logic, we duplicated the logic for
> strbuf_trim_trailing_newline() with one exception: instead of preventing
> buffer accesses below index 0, it would prevent buffer accesses below
> index `orig_len`. Although this is correct, it isn't worth having the
> duplicated logic around.
> 
> Reset `buf` before using it then replace the trimming logic with
> strbuf_trim().

I should have picked up on this before but this changes the semantics of 
the function as it strips all whitespace from the start and end of the 
strbuf. Above you talked about using strbuf_trim_trailing_newline() 
instead which would not change the semantics of this function. You could 
test to see if we've read anything and only call 
strbuf_trim_trailing_newline() in that case without messing with 
strbuf_reset(). (there is a corner case where if the buffer ends with 
'\r' when the function is called and it reads a single '\n' then the 
'\r' would be stripped as well but I think that is unlikely to happen in 
the wild)

Best Wishes

Phillip

> As a cleanup, remove all reset_strbuf()s that happen before
> read_oneliner() is called.
> 
> Signed-off-by: Denton Liu <liu.denton@gmail.com>
> ---
>   sequencer.c | 18 +++++-------------
>   1 file changed, 5 insertions(+), 13 deletions(-)
> 
> diff --git a/sequencer.c b/sequencer.c
> index faab0b13e8..09ca68f540 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -420,8 +420,8 @@ static int write_message(const void *buf, size_t len, const char *filename,
>   }
>   
>   /*
> - * Reads a file that was presumably written by a shell script, i.e. with an
> - * end-of-line marker that needs to be stripped.
> + * Resets a strbuf then reads a file that was presumably written by a shell
> + * script, i.e. with an end-of-line marker that needs to be stripped.
>    *
>    * Note that only the last end-of-line marker is stripped, consistent with the
>    * behavior of "$(cat path)" in a shell script.
> @@ -431,23 +431,19 @@ static int write_message(const void *buf, size_t len, const char *filename,
>   static int read_oneliner(struct strbuf *buf,
>   	const char *path, int skip_if_empty)
>   {
> -	int orig_len = buf->len;
>   
>   	if (!file_exists(path))
>   		return 0;
>   
> +	strbuf_reset(buf);
>   	if (strbuf_read_file(buf, path, 0) < 0) {
>   		warning_errno(_("could not read '%s'"), path);
>   		return 0;
>   	}
>   
> -	if (buf->len > orig_len && buf->buf[buf->len - 1] == '\n') {
> -		if (--buf->len > orig_len && buf->buf[buf->len - 1] == '\r')
> -			--buf->len;
> -		buf->buf[buf->len] = '\0';
> -	}
> +	strbuf_trim(buf);
>   
> -	if (skip_if_empty && buf->len == orig_len)
> +	if (skip_if_empty && !buf->len)
>   		return 0;
>   
>   	return 1;
> @@ -2471,7 +2467,6 @@ void parse_strategy_opts(struct replay_opts *opts, char *raw_opts)
>   
>   static void read_strategy_opts(struct replay_opts *opts, struct strbuf *buf)
>   {
> -	strbuf_reset(buf);
>   	if (!read_oneliner(buf, rebase_path_strategy(), 0))
>   		return;
>   	opts->strategy = strbuf_detach(buf, NULL);
> @@ -2494,7 +2489,6 @@ static int read_populate_opts(struct replay_opts *opts)
>   				free(opts->gpg_sign);
>   				opts->gpg_sign = xstrdup(buf.buf + 2);
>   			}
> -			strbuf_reset(&buf);
>   		}
>   
>   		if (read_oneliner(&buf, rebase_path_allow_rerere_autoupdate(), 1)) {
> @@ -2526,7 +2520,6 @@ static int read_populate_opts(struct replay_opts *opts)
>   			opts->keep_redundant_commits = 1;
>   
>   		read_strategy_opts(opts, &buf);
> -		strbuf_reset(&buf);
>   
>   		if (read_oneliner(&opts->current_fixups,
>   				  rebase_path_current_fixups(), 1)) {
> @@ -4006,7 +3999,6 @@ static int pick_commits(struct repository *r,
>   				res = error(_("could not read orig-head"));
>   				goto cleanup_head_ref;
>   			}
> -			strbuf_reset(&buf);
>   			if (!read_oneliner(&buf, rebase_path_onto(), 0)) {
>   				res = error(_("could not read 'onto'"));
>   				goto cleanup_head_ref;
>
Phillip Wood April 6, 2020, 2:42 p.m. UTC | #4
Hi Denton

On 06/04/2020 15:03, Phillip Wood wrote:
> Hi Denton
> 
> On 04/04/2020 02:11, Denton Liu wrote:
>> In the original read_oneliner() logic, we duplicated the logic for
>> strbuf_trim_trailing_newline() with one exception: instead of preventing
>> buffer accesses below index 0, it would prevent buffer accesses below
>> index `orig_len`. Although this is correct, it isn't worth having the
>> duplicated logic around.
>>
>> Reset `buf` before using it then replace the trimming logic with
>> strbuf_trim().
> 
> I should have picked up on this before but this changes the semantics of 
> the function as it strips all whitespace from the start and end of the 
> strbuf. Above you talked about using strbuf_trim_trailing_newline() 
> instead which would not change the semantics of this function. You could 
> test to see if we've read anything and only call 
> strbuf_trim_trailing_newline() in that case without messing with 
> strbuf_reset(). (there is a corner case where if the buffer ends with 
> '\r' when the function is called and it reads a single '\n' then the 
> '\r' would be stripped as well but I think that is unlikely to happen in 
> the wild)

I'd also be fine with dropping this patch and leaving the trimming code 
as is

Best Wishes

Phillip

> Best Wishes
> 
> Phillip
> 
>> As a cleanup, remove all reset_strbuf()s that happen before
>> read_oneliner() is called.
>>
>> Signed-off-by: Denton Liu <liu.denton@gmail.com>
>> ---
>>   sequencer.c | 18 +++++-------------
>>   1 file changed, 5 insertions(+), 13 deletions(-)
>>
>> diff --git a/sequencer.c b/sequencer.c
>> index faab0b13e8..09ca68f540 100644
>> --- a/sequencer.c
>> +++ b/sequencer.c
>> @@ -420,8 +420,8 @@ static int write_message(const void *buf, size_t 
>> len, const char *filename,
>>   }
>>   /*
>> - * Reads a file that was presumably written by a shell script, i.e. 
>> with an
>> - * end-of-line marker that needs to be stripped.
>> + * Resets a strbuf then reads a file that was presumably written by a 
>> shell
>> + * script, i.e. with an end-of-line marker that needs to be stripped.
>>    *
>>    * Note that only the last end-of-line marker is stripped, 
>> consistent with the
>>    * behavior of "$(cat path)" in a shell script.
>> @@ -431,23 +431,19 @@ static int write_message(const void *buf, size_t 
>> len, const char *filename,
>>   static int read_oneliner(struct strbuf *buf,
>>       const char *path, int skip_if_empty)
>>   {
>> -    int orig_len = buf->len;
>>       if (!file_exists(path))
>>           return 0;
>> +    strbuf_reset(buf);
>>       if (strbuf_read_file(buf, path, 0) < 0) {
>>           warning_errno(_("could not read '%s'"), path);
>>           return 0;
>>       }
>> -    if (buf->len > orig_len && buf->buf[buf->len - 1] == '\n') {
>> -        if (--buf->len > orig_len && buf->buf[buf->len - 1] == '\r')
>> -            --buf->len;
>> -        buf->buf[buf->len] = '\0';
>> -    }
>> +    strbuf_trim(buf);
>> -    if (skip_if_empty && buf->len == orig_len)
>> +    if (skip_if_empty && !buf->len)
>>           return 0;
>>       return 1;
>> @@ -2471,7 +2467,6 @@ void parse_strategy_opts(struct replay_opts 
>> *opts, char *raw_opts)
>>   static void read_strategy_opts(struct replay_opts *opts, struct 
>> strbuf *buf)
>>   {
>> -    strbuf_reset(buf);
>>       if (!read_oneliner(buf, rebase_path_strategy(), 0))
>>           return;
>>       opts->strategy = strbuf_detach(buf, NULL);
>> @@ -2494,7 +2489,6 @@ static int read_populate_opts(struct replay_opts 
>> *opts)
>>                   free(opts->gpg_sign);
>>                   opts->gpg_sign = xstrdup(buf.buf + 2);
>>               }
>> -            strbuf_reset(&buf);
>>           }
>>           if (read_oneliner(&buf, 
>> rebase_path_allow_rerere_autoupdate(), 1)) {
>> @@ -2526,7 +2520,6 @@ static int read_populate_opts(struct replay_opts 
>> *opts)
>>               opts->keep_redundant_commits = 1;
>>           read_strategy_opts(opts, &buf);
>> -        strbuf_reset(&buf);
>>           if (read_oneliner(&opts->current_fixups,
>>                     rebase_path_current_fixups(), 1)) {
>> @@ -4006,7 +3999,6 @@ static int pick_commits(struct repository *r,
>>                   res = error(_("could not read orig-head"));
>>                   goto cleanup_head_ref;
>>               }
>> -            strbuf_reset(&buf);
>>               if (!read_oneliner(&buf, rebase_path_onto(), 0)) {
>>                   res = error(_("could not read 'onto'"));
>>                   goto cleanup_head_ref;
>>
Denton Liu April 7, 2020, 1:56 p.m. UTC | #5
Hi Phillip,

On Mon, Apr 06, 2020 at 03:42:59PM +0100, Phillip Wood wrote:
> Hi Denton
> 
> On 06/04/2020 15:03, Phillip Wood wrote:
> > Hi Denton
> > 
> > On 04/04/2020 02:11, Denton Liu wrote:
> > > In the original read_oneliner() logic, we duplicated the logic for
> > > strbuf_trim_trailing_newline() with one exception: instead of preventing
> > > buffer accesses below index 0, it would prevent buffer accesses below
> > > index `orig_len`. Although this is correct, it isn't worth having the
> > > duplicated logic around.
> > > 
> > > Reset `buf` before using it then replace the trimming logic with
> > > strbuf_trim().
> > 
> > I should have picked up on this before but this changes the semantics of
> > the function as it strips all whitespace from the start and end of the
> > strbuf. Above you talked about using strbuf_trim_trailing_newline()
> > instead which would not change the semantics of this function. You could
> > test to see if we've read anything and only call
> > strbuf_trim_trailing_newline() in that case without messing with
> > strbuf_reset(). (there is a corner case where if the buffer ends with
> > '\r' when the function is called and it reads a single '\n' then the
> > '\r' would be stripped as well but I think that is unlikely to happen in
> > the wild)
> 
> I'd also be fine with dropping this patch and leaving the trimming code as
> is

Yeah, I'll just drop this patch. I don't think it's worth holding up the
rest of the series on this small detail. I'll probably try to resurrect
this at a later time.

Thanks,

Denton

> Best Wishes
> 
> Phillip
diff mbox series

Patch

diff --git a/sequencer.c b/sequencer.c
index faab0b13e8..09ca68f540 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -420,8 +420,8 @@  static int write_message(const void *buf, size_t len, const char *filename,
 }
 
 /*
- * Reads a file that was presumably written by a shell script, i.e. with an
- * end-of-line marker that needs to be stripped.
+ * Resets a strbuf then reads a file that was presumably written by a shell
+ * script, i.e. with an end-of-line marker that needs to be stripped.
  *
  * Note that only the last end-of-line marker is stripped, consistent with the
  * behavior of "$(cat path)" in a shell script.
@@ -431,23 +431,19 @@  static int write_message(const void *buf, size_t len, const char *filename,
 static int read_oneliner(struct strbuf *buf,
 	const char *path, int skip_if_empty)
 {
-	int orig_len = buf->len;
 
 	if (!file_exists(path))
 		return 0;
 
+	strbuf_reset(buf);
 	if (strbuf_read_file(buf, path, 0) < 0) {
 		warning_errno(_("could not read '%s'"), path);
 		return 0;
 	}
 
-	if (buf->len > orig_len && buf->buf[buf->len - 1] == '\n') {
-		if (--buf->len > orig_len && buf->buf[buf->len - 1] == '\r')
-			--buf->len;
-		buf->buf[buf->len] = '\0';
-	}
+	strbuf_trim(buf);
 
-	if (skip_if_empty && buf->len == orig_len)
+	if (skip_if_empty && !buf->len)
 		return 0;
 
 	return 1;
@@ -2471,7 +2467,6 @@  void parse_strategy_opts(struct replay_opts *opts, char *raw_opts)
 
 static void read_strategy_opts(struct replay_opts *opts, struct strbuf *buf)
 {
-	strbuf_reset(buf);
 	if (!read_oneliner(buf, rebase_path_strategy(), 0))
 		return;
 	opts->strategy = strbuf_detach(buf, NULL);
@@ -2494,7 +2489,6 @@  static int read_populate_opts(struct replay_opts *opts)
 				free(opts->gpg_sign);
 				opts->gpg_sign = xstrdup(buf.buf + 2);
 			}
-			strbuf_reset(&buf);
 		}
 
 		if (read_oneliner(&buf, rebase_path_allow_rerere_autoupdate(), 1)) {
@@ -2526,7 +2520,6 @@  static int read_populate_opts(struct replay_opts *opts)
 			opts->keep_redundant_commits = 1;
 
 		read_strategy_opts(opts, &buf);
-		strbuf_reset(&buf);
 
 		if (read_oneliner(&opts->current_fixups,
 				  rebase_path_current_fixups(), 1)) {
@@ -4006,7 +3999,6 @@  static int pick_commits(struct repository *r,
 				res = error(_("could not read orig-head"));
 				goto cleanup_head_ref;
 			}
-			strbuf_reset(&buf);
 			if (!read_oneliner(&buf, rebase_path_onto(), 0)) {
 				res = error(_("could not read 'onto'"));
 				goto cleanup_head_ref;