diff mbox series

[v2,08/11] rebase: remove redundant strbuf

Message ID ad3c4efc0272be8eee052a08731656a406f8f90b.1631546362.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series rebase: dereference tags | expand

Commit Message

Phillip Wood Sept. 13, 2021, 3:19 p.m. UTC
From: Phillip Wood <phillip.wood@dunelm.org.uk>

There is already an strbuf that can be reused for creating messages.
msg is not freed if there is an error and there is a logic error where
we call strbuf_release(&msg) followed by strbuf_reset(&msg) and
strbuf_addf(&msg).

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 builtin/rebase.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

Comments

René Scharfe Sept. 13, 2021, 6:34 p.m. UTC | #1
Am 13.09.21 um 17:19 schrieb Phillip Wood via GitGitGadget:
> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> There is already an strbuf that can be reused for creating messages.

Reminds me of a terrible joke from elementary school: In Peter's class
everybody is called Klaus, except Franz -- his name is Michael.

Why would we want to use the same variable for multiple purposes?  This
makes the code harder to read.  And the allocation overhead for these
few cases should be negligible.

The most important question: Is this patch really needed to support
tags (the purpose of this series)?

> msg is not freed if there is an error and there is a logic error where
> we call strbuf_release(&msg) followed by strbuf_reset(&msg) and
> strbuf_addf(&msg).

strbuf_reset() after strbuf_release() is redundant but legal.

>
> Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> ---
>  builtin/rebase.c | 14 ++++++--------
>  1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/rebase.c b/builtin/rebase.c
> index 6138009d6e4..69a67ab1252 100644
> --- a/builtin/rebase.c
> +++ b/builtin/rebase.c
> @@ -1299,7 +1299,6 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
>  	int ret, flags, total_argc, in_progress = 0;
>  	int keep_base = 0;
>  	int ok_to_skip_pre_rebase = 0;
> -	struct strbuf msg = STRBUF_INIT;
>  	struct strbuf revisions = STRBUF_INIT;
>  	struct strbuf buf = STRBUF_INIT;
>  	struct object_id merge_base;
> @@ -2063,30 +2062,29 @@ int cmd_rebase(int argc, const char **argv, const char *prefix)
>  		printf(_("First, rewinding head to replay your work on top of "
>  			 "it...\n"));
>
> -	strbuf_addf(&msg, "%s: checkout %s",
> +	strbuf_reset(&buf);
> +	strbuf_addf(&buf, "%s: checkout %s",
>  		    getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options.onto_name);
>  	if (reset_head(the_repository, &options.onto->object.oid, "checkout", NULL,
>  		       RESET_HEAD_DETACH | RESET_ORIG_HEAD |
>  		       RESET_HEAD_RUN_POST_CHECKOUT_HOOK,
> -		       NULL, msg.buf, DEFAULT_REFLOG_ACTION))
> +		       NULL, buf.buf, DEFAULT_REFLOG_ACTION))
>  		die(_("Could not detach HEAD"));
> -	strbuf_release(&msg);
>
>  	/*
>  	 * If the onto is a proper descendant of the tip of the branch, then
>  	 * we just fast-forwarded.
>  	 */
> -	strbuf_reset(&msg);
> +	strbuf_reset(&buf);
>  	if (oideq(&merge_base, &options.orig_head)) {
>  		printf(_("Fast-forwarded %s to %s.\n"),
>  			branch_name, options.onto_name);
> -		strbuf_addf(&msg, "rebase finished: %s onto %s",
> +		strbuf_addf(&buf, "rebase finished: %s onto %s",
>  			options.head_name ? options.head_name : "detached HEAD",
>  			oid_to_hex(&options.onto->object.oid));
>  		reset_head(the_repository, NULL, "Fast-forwarded", options.head_name,
> -			   RESET_HEAD_REFS_ONLY, "HEAD", msg.buf,
> +			   RESET_HEAD_REFS_ONLY, "HEAD", buf.buf,
>  			   DEFAULT_REFLOG_ACTION);
> -		strbuf_release(&msg);
>  		ret = !!finish_rebase(&options);
>  		goto cleanup;
>  	}
>

msg is not released if die() is called, but that's OK; in all other
cases it _is_ released in the old code.

I'd rather see the use of that multi-purpose "buf" reduced, e.g. we
could simplify path-building like this in a few cases:

-       strbuf_reset(&buf);
-       strbuf_addf(&buf, "%s/applying", apply_dir());
-       if(file_exists(buf.buf))
+       if (file_exists(mkpath("%s/applying", apply_dir())))

Sure, this looks a bit lispy, but still better than the old code
because there is no state to carry around and reset when "buf" is
repurposed.

René
Junio C Hamano Sept. 13, 2021, 10:40 p.m. UTC | #2
René Scharfe <l.s.r@web.de> writes:

> Am 13.09.21 um 17:19 schrieb Phillip Wood via GitGitGadget:
>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>
>> There is already an strbuf that can be reused for creating messages.
>
> Reminds me of a terrible joke from elementary school: In Peter's class
> everybody is called Klaus, except Franz -- his name is Michael.
>
> Why would we want to use the same variable for multiple purposes?  This
> makes the code harder to read.  And the allocation overhead for these
> few cases should be negligible.
>
> The most important question: Is this patch really needed to support
> tags (the purpose of this series)?
>
>> msg is not freed if there is an error and there is a logic error where
>> we call strbuf_release(&msg) followed by strbuf_reset(&msg) and
>> strbuf_addf(&msg).
>
> strbuf_reset() after strbuf_release() is redundant but legal.

All good points.

I do not care too deeply either way, in the sense that it probably
is better for this function to have two variables (with at least one
of them having a meaningful name "msg" that tells readers what it is
about), if the original submission to rewrite "rebase" in C used a
single strbuf for both of these and given it a name (like "tmp")
that makes it clear that the buffer is merely a temporary area
without any longer term significance, I probably wouldn't have told
the submitter to rewrite it to use separate strbuf variables.

But if existing code already uses two variables, with at least one
of them having a meaningful name that tells what it is used for, I
see no reason why we want to rewrite it to use a single one.

Thanks.
Phillip Wood Sept. 14, 2021, 10:31 a.m. UTC | #3
On 13/09/2021 23:40, Junio C Hamano wrote:
> René Scharfe <l.s.r@web.de> writes:
> 
>> Am 13.09.21 um 17:19 schrieb Phillip Wood via GitGitGadget:
>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>
>>> There is already an strbuf that can be reused for creating messages.
>>
>> Reminds me of a terrible joke from elementary school: In Peter's class
>> everybody is called Klaus, except Franz -- his name is Michael.
>>
>> Why would we want to use the same variable for multiple purposes?  This
>> makes the code harder to read.  And the allocation overhead for these
>> few cases should be negligible.

For better or worse reusing the same strbuf is a common pattern and when 
I saw the code switch to using a different variable I wondered if it was 
because the value of buf was being used later. It is also confusing to 
free msg in a different place to all the other variables. rebase_cmd() 
being so long does not help (msg is defined 763 lines before its first 
use) as it makes it harder to see what is going on.

>> The most important question: Is this patch really needed to support
>> tags (the purpose of this series)?
>>
>>> msg is not freed if there is an error and there is a logic error where
>>> we call strbuf_release(&msg) followed by strbuf_reset(&msg) and
>>> strbuf_addf(&msg).
>>
>> strbuf_reset() after strbuf_release() is redundant but legal.

It is confusing to the reader though, I spent time checking why the 
strbuf_release() call was there before concluding it was a mistake.

> All good points.
> 
> I do not care too deeply either way, in the sense that it probably
> is better for this function to have two variables (with at least one
> of them having a meaningful name "msg" that tells readers what it is
> about), if the original submission to rewrite "rebase" in C used a
> single strbuf for both of these and given it a name (like "tmp")
> that makes it clear that the buffer is merely a temporary area
> without any longer term significance, I probably wouldn't have told
> the submitter to rewrite it to use separate strbuf variables.
> 
> But if existing code already uses two variables, with at least one
> of them having a meaningful name that tells what it is used for, I
> see no reason why we want to rewrite it to use a single one.

In a short function where it is easy to see what happens between the 
variable declaration and its use I'd agree but here everything is so 
spread out I actually found the switch to a second variable confusing.

Best Wishes

Phillip

> Thanks.
>
Phillip Wood Sept. 14, 2021, 10:33 a.m. UTC | #4
Hi René

Thanks for looking at this series

On 13/09/2021 19:34, René Scharfe wrote:
> Am 13.09.21 um 17:19 schrieb Phillip Wood via GitGitGadget:
>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
> I'd rather see the use of that multi-purpose "buf" reduced, e.g. we
> could simplify path-building like this in a few cases:
> 
> -       strbuf_reset(&buf);
> -       strbuf_addf(&buf, "%s/applying", apply_dir());
> -       if(file_exists(buf.buf))
> +       if (file_exists(mkpath("%s/applying", apply_dir())))
> 
> Sure, this looks a bit lispy, but still better than the old code
> because there is no state to carry around and reset when "buf" is
> repurposed.

That's a nice suggestion, I think it's much clearer which file we're 
checking for.

Best Wishes

Phillip
diff mbox series

Patch

diff --git a/builtin/rebase.c b/builtin/rebase.c
index 6138009d6e4..69a67ab1252 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -1299,7 +1299,6 @@  int cmd_rebase(int argc, const char **argv, const char *prefix)
 	int ret, flags, total_argc, in_progress = 0;
 	int keep_base = 0;
 	int ok_to_skip_pre_rebase = 0;
-	struct strbuf msg = STRBUF_INIT;
 	struct strbuf revisions = STRBUF_INIT;
 	struct strbuf buf = STRBUF_INIT;
 	struct object_id merge_base;
@@ -2063,30 +2062,29 @@  int cmd_rebase(int argc, const char **argv, const char *prefix)
 		printf(_("First, rewinding head to replay your work on top of "
 			 "it...\n"));
 
-	strbuf_addf(&msg, "%s: checkout %s",
+	strbuf_reset(&buf);
+	strbuf_addf(&buf, "%s: checkout %s",
 		    getenv(GIT_REFLOG_ACTION_ENVIRONMENT), options.onto_name);
 	if (reset_head(the_repository, &options.onto->object.oid, "checkout", NULL,
 		       RESET_HEAD_DETACH | RESET_ORIG_HEAD |
 		       RESET_HEAD_RUN_POST_CHECKOUT_HOOK,
-		       NULL, msg.buf, DEFAULT_REFLOG_ACTION))
+		       NULL, buf.buf, DEFAULT_REFLOG_ACTION))
 		die(_("Could not detach HEAD"));
-	strbuf_release(&msg);
 
 	/*
 	 * If the onto is a proper descendant of the tip of the branch, then
 	 * we just fast-forwarded.
 	 */
-	strbuf_reset(&msg);
+	strbuf_reset(&buf);
 	if (oideq(&merge_base, &options.orig_head)) {
 		printf(_("Fast-forwarded %s to %s.\n"),
 			branch_name, options.onto_name);
-		strbuf_addf(&msg, "rebase finished: %s onto %s",
+		strbuf_addf(&buf, "rebase finished: %s onto %s",
 			options.head_name ? options.head_name : "detached HEAD",
 			oid_to_hex(&options.onto->object.oid));
 		reset_head(the_repository, NULL, "Fast-forwarded", options.head_name,
-			   RESET_HEAD_REFS_ONLY, "HEAD", msg.buf,
+			   RESET_HEAD_REFS_ONLY, "HEAD", buf.buf,
 			   DEFAULT_REFLOG_ACTION);
-		strbuf_release(&msg);
 		ret = !!finish_rebase(&options);
 		goto cleanup;
 	}