diff mbox series

[01/22] sequencer: use repository parameter in short_commit_name()

Message ID 20230828214629.GA3831137@coredump.intra.peff.net (mailing list archive)
State New, archived
Headers show
Series YAUPS: Yet Another Unused Parameter Series | expand

Commit Message

Jeff King Aug. 28, 2023, 9:46 p.m. UTC
Instead of just using the_repository, we can take a repository parameter
from the caller. Most of them already have one, and doing so clears up a
few -Wunused-parameter warnings. There are still a few callers which use
the_repository, but this pushes us one small step forward to eventually
getting rid of those.

Signed-off-by: Jeff King <peff@peff.net>
---
 sequencer.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

Comments

Junio C Hamano Aug. 28, 2023, 10:21 p.m. UTC | #1
Jeff King <peff@peff.net> writes:

> -static const char *short_commit_name(struct commit *commit)
> +static const char *short_commit_name(struct repository *r, struct commit *commit)
>  {
> -	return repo_find_unique_abbrev(the_repository, &commit->object.oid,
> -				       DEFAULT_ABBREV);
> +	return repo_find_unique_abbrev(r, &commit->object.oid, DEFAULT_ABBREV);
>  }

Theoretically this is the right thing to do, and ...

>  static int get_message(struct commit *commit, struct commit_message *out)
> @@ -446,7 +445,7 @@ static int get_message(struct commit *commit, struct commit_message *out)
>  
>  	out->message = repo_logmsg_reencode(the_repository, commit, NULL,
>  					    get_commit_output_encoding());
> -	abbrev = short_commit_name(commit);
> +	abbrev = short_commit_name(the_repository, commit);

... this is a no-op.

> @@ -2383,7 +2382,7 @@ static int do_pick_commit(struct repository *r,
>  		error(command == TODO_REVERT
>  		      ? _("could not revert %s... %s")
>  		      : _("could not apply %s... %s"),
> -		      short_commit_name(commit), msg.subject);
> +		      short_commit_name(r, commit), msg.subject);

And this is a logical consequence for a function that is told by the
caller in which repository to work in via its parameter.

> @@ -3172,7 +3171,8 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
>  		item->offset_in_buf = todo_list->buf.len;
>  		subject_len = find_commit_subject(commit_buffer, &subject);
>  		strbuf_addf(&todo_list->buf, "%s %s %.*s\n", command_string,
> -			short_commit_name(commit), subject_len, subject);
> +			short_commit_name(opts->revs->repo, commit),
> +			subject_len, subject);
>  		repo_unuse_commit_buffer(the_repository, commit,
>  					 commit_buffer);

But how do we ascertain that opts->revs->repo is always safe to use
(iow initialized to a sensible value)?  repo_init_revisions() takes
a repository as its parameter and the first thing it does is to set
it to the revs->repo, so it is safe for any "struct rev_info" that
came from there, but REV_INFO_INIT omits initializer for the .repo
member.

The other two calls in this loop refer to the_repository so the
current code would be safe even if opts->revs->repo is set or NULL,
but that will no longer be true with the updated code.  I'd feel
safer if at the beginning of the function we create a local variable
"struct rev_info *repo" that is initialized to opts->revs->repo and
use it throughout the function instead of the_repository.


> @@ -5564,7 +5564,7 @@ static int make_script_with_merges(struct pretty_print_context *pp,
>  		if (!is_empty && (commit->object.flags & PATCHSAME)) {
>  			if (flags & TODO_LIST_WARN_SKIPPED_CHERRY_PICKS)
>  				warning(_("skipped previously applied commit %s"),
> -					short_commit_name(commit));
> +					short_commit_name(revs->repo, commit));
>  			skipped_commit = 1;
>  			continue;
>  		}

This one I am fairly certain is a safe and correct conversion; the
function would be segfaulting in its call to get_revision() if
revs->repo were set to a bogus value.

Thanks.
Taylor Blau Aug. 28, 2023, 11:06 p.m. UTC | #2
On Mon, Aug 28, 2023 at 03:21:11PM -0700, Junio C Hamano wrote:
> > @@ -3172,7 +3171,8 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
> >  		item->offset_in_buf = todo_list->buf.len;
> >  		subject_len = find_commit_subject(commit_buffer, &subject);
> >  		strbuf_addf(&todo_list->buf, "%s %s %.*s\n", command_string,
> > -			short_commit_name(commit), subject_len, subject);
> > +			short_commit_name(opts->revs->repo, commit),
> > +			subject_len, subject);
> >  		repo_unuse_commit_buffer(the_repository, commit,
> >  					 commit_buffer);
>
> But how do we ascertain that opts->revs->repo is always safe to use
> (iow initialized to a sensible value)?  repo_init_revisions() takes
> a repository as its parameter and the first thing it does is to set
> it to the revs->repo, so it is safe for any "struct rev_info" that
> came from there, but REV_INFO_INIT omits initializer for the .repo
> member.
>
> The other two calls in this loop refer to the_repository so the
> current code would be safe even if opts->revs->repo is set or NULL,
> but that will no longer be true with the updated code.  I'd feel
> safer if at the beginning of the function we create a local variable
> "struct rev_info *repo" that is initialized to opts->revs->repo and
> use it throughout the function instead of the_repository.

This call comes from sequencer_pick_revisions() ->
walk_revs_populate_todo(), where opts is passed in from the caller of
the former function.

The sole caller of that function is run_sequencer() in builtin/revert.c.
The relevant portion of that function is:

    if (cmd) {
        opts->revs = NULL;
    } else {
        struct setup_revision_opt s_r_opt;
        opts->revs = xmalloc(sizeof(*opts->revs));
        repo_init_revisions(the_repository, opts->revs, NULL);
        opts->revs->no_walk = 1;
        opts->revs->unsorted_input = 1;
        /* ... */
    }

So as long as we end up in the else arm of this conditional, we'll have
called repo_init_revisions(), causing opts->revs->repo to be equal to
the_repository.

Thankfully, we have an assert(opts->revs) at the beginning of
sequencer_pick_revisions(), so we know that we always take the else arm
on this particular call path.

...but, sequencer_pick_revisions() itself takes a repository pointer, so
we could equally use that, or drop it, like so:

--- 8< ---
diff --git a/builtin/revert.c b/builtin/revert.c
index e6f9a1ad26..29e215c72a 100644
--- a/builtin/revert.c
+++ b/builtin/revert.c
@@ -224,7 +224,7 @@ static int run_sequencer(int argc, const char **argv, const char *prefix,
 		return sequencer_rollback(the_repository, opts);
 	if (cmd == 's')
 		return sequencer_skip(the_repository, opts);
-	return sequencer_pick_revisions(the_repository, opts);
+	return sequencer_pick_revisions(opts);
 }

 int cmd_revert(int argc, const char **argv, const char *prefix)
diff --git a/sequencer.c b/sequencer.c
index 48475d1cc6..bc7e687623 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -5201,14 +5201,15 @@ static int single_pick(struct repository *r,
 	return do_pick_commit(r, &item, opts, 0, &check_todo);
 }

-int sequencer_pick_revisions(struct repository *r,
-			     struct replay_opts *opts)
+int sequencer_pick_revisions(struct replay_opts *opts)
 {
 	struct todo_list todo_list = TODO_LIST_INIT;
 	struct object_id oid;
+	struct repository *r;
 	int i, res;

 	assert(opts->revs);
+	r = opts->revs->repo;
 	if (read_and_refresh_cache(r, opts))
 		return -1;

diff --git a/sequencer.h b/sequencer.h
index 913a0f652d..10caa3dc93 100644
--- a/sequencer.h
+++ b/sequencer.h
@@ -158,8 +158,7 @@ void todo_list_filter_update_refs(struct repository *r,

 /* Call this to setup defaults before parsing command line options */
 void sequencer_init_config(struct replay_opts *opts);
-int sequencer_pick_revisions(struct repository *repo,
-			     struct replay_opts *opts);
+int sequencer_pick_revisions(struct replay_opts *opts);
 int sequencer_continue(struct repository *repo, struct replay_opts *opts);
 int sequencer_rollback(struct repository *repo, struct replay_opts *opts);
 int sequencer_skip(struct repository *repo, struct replay_opts *opts);
--- >8 ---

though it makes for an awkward API to have all of the other
sequencer-related functions take as their first argument a pointer to a
repository struct, leaving sequencer_pick_revisions() as the odd one
out.

So I'd probably just as soon drop that and do what Junio suggests:

--- 8< ---
diff --git a/sequencer.c b/sequencer.c
index 82dc3e160e..6c06b8e1bb 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -3143,22 +3143,24 @@ int write_basic_state(struct replay_opts *opts, const char *head_name,
 }

 static int walk_revs_populate_todo(struct todo_list *todo_list,
-				struct replay_opts *opts)
+				   struct replay_opts *opts)
 {
 	enum todo_command command = opts->action == REPLAY_PICK ?
 		TODO_PICK : TODO_REVERT;
 	const char *command_string = todo_command_info[command].str;
 	const char *encoding;
 	struct commit *commit;
+	struct repository *r;

 	if (prepare_revs(opts))
 		return -1;
+	r = opts->revs->repo;

 	encoding = get_log_output_encoding();

 	while ((commit = get_revision(opts->revs))) {
 		struct todo_item *item = append_new_todo(todo_list);
-		const char *commit_buffer = repo_logmsg_reencode(the_repository,
+		const char *commit_buffer = repo_logmsg_reencode(r,
 								 commit, NULL,
 								 encoding);
 		const char *subject;
@@ -3173,8 +3175,7 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
 		strbuf_addf(&todo_list->buf, "%s %s %.*s\n", command_string,
 			short_commit_name(opts->revs->repo, commit),
 			subject_len, subject);
-		repo_unuse_commit_buffer(the_repository, commit,
-					 commit_buffer);
+		repo_unuse_commit_buffer(r, commit, commit_buffer);
 	}

 	if (!todo_list->nr)
--- >8 ---

> > @@ -5564,7 +5564,7 @@ static int make_script_with_merges(struct pretty_print_context *pp,
> >  		if (!is_empty && (commit->object.flags & PATCHSAME)) {
> >  			if (flags & TODO_LIST_WARN_SKIPPED_CHERRY_PICKS)
> >  				warning(_("skipped previously applied commit %s"),
> > -					short_commit_name(commit));
> > +					short_commit_name(revs->repo, commit));
> >  			skipped_commit = 1;
> >  			continue;
> >  		}
>
> This one I am fairly certain is a safe and correct conversion; the
> function would be segfaulting in its call to get_revision() if
> revs->repo were set to a bogus value.

Agreed.

Thanks,
Taylor
Jeff King Aug. 29, 2023, 12:48 a.m. UTC | #3
On Mon, Aug 28, 2023 at 03:21:11PM -0700, Junio C Hamano wrote:

> > @@ -3172,7 +3171,8 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
> >  		item->offset_in_buf = todo_list->buf.len;
> >  		subject_len = find_commit_subject(commit_buffer, &subject);
> >  		strbuf_addf(&todo_list->buf, "%s %s %.*s\n", command_string,
> > -			short_commit_name(commit), subject_len, subject);
> > +			short_commit_name(opts->revs->repo, commit),
> > +			subject_len, subject);
> >  		repo_unuse_commit_buffer(the_repository, commit,
> >  					 commit_buffer);
> 
> But how do we ascertain that opts->revs->repo is always safe to use
> (iow initialized to a sensible value)?  repo_init_revisions() takes
> a repository as its parameter and the first thing it does is to set
> it to the revs->repo, so it is safe for any "struct rev_info" that
> came from there, but REV_INFO_INIT omits initializer for the .repo
> member.

I mostly just assumed it would have been initialized, because passing in
anything else and expecting it to work is a bit crazy. REV_INFO_INIT
specifically says:

   * This will not fully initialize a "struct rev_info", the
   * repo_init_revisions() function needs to be called before
   * setup_revisions() and any revision walking takes place.

and then goes on to explain that the point is just so you can "goto
cleanup" and free it if you never made it to the init_revisions() call.
So the code would already be pretty buggy in this case.

That said, all of the code paths here do call repo_init_revisions(), so
I think it is OK. But if we want to make the patch simple and more
obviously correct, I would prefer to just blindly use the_repository in
callers that don't have a "struct repository" themselves. Then we know
it's a strict step forward (if slightly smaller), and we can leave the
refactoring of the rest of the sequencer code for another day. I was
trying hard to draw a reasonable line and not get this already-large
series derailed by only semi-related refactoring.

> The other two calls in this loop refer to the_repository so the
> current code would be safe even if opts->revs->repo is set or NULL,
> but that will no longer be true with the updated code.  I'd feel
> safer if at the beginning of the function we create a local variable
> "struct rev_info *repo" that is initialized to opts->revs->repo and
> use it throughout the function instead of the_repository.

I'm not sure how that makes it any safer, as it would mean using the
suspect repo pointer in more places. Unless you are proposing to do:

  struct repository *r = opts->revs->repo;
  if (!r)
	r = the_repository; /* or BUG() ? */

-Peff
Junio C Hamano Aug. 29, 2023, 1:16 a.m. UTC | #4
Jeff King <peff@peff.net> writes:

>> but that will no longer be true with the updated code.  I'd feel
>> safer if at the beginning of the function we create a local variable
>> "struct rev_info *repo" that is initialized to opts->revs->repo and
>> use it throughout the function instead of the_repository.
>
> I'm not sure how that makes it any safer, as it would mean using the
> suspect repo pointer in more places. Unless you are proposing to do:
>
>   struct repository *r = opts->revs->repo;
>   if (!r)
> 	r = the_repository; /* or BUG() ? */

Yup, the BUG() variant was exactly what I had in mind, but without
the assert, by using opts->revs->repo more consistently, we would
make sure nobody will call us with uninitialized opts->revs->repo
now or in the future.
diff mbox series

Patch

diff --git a/sequencer.c b/sequencer.c
index 48475d1cc6..82dc3e160e 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -433,10 +433,9 @@  struct commit_message {
 	const char *message;
 };
 
-static const char *short_commit_name(struct commit *commit)
+static const char *short_commit_name(struct repository *r, struct commit *commit)
 {
-	return repo_find_unique_abbrev(the_repository, &commit->object.oid,
-				       DEFAULT_ABBREV);
+	return repo_find_unique_abbrev(r, &commit->object.oid, DEFAULT_ABBREV);
 }
 
 static int get_message(struct commit *commit, struct commit_message *out)
@@ -446,7 +445,7 @@  static int get_message(struct commit *commit, struct commit_message *out)
 
 	out->message = repo_logmsg_reencode(the_repository, commit, NULL,
 					    get_commit_output_encoding());
-	abbrev = short_commit_name(commit);
+	abbrev = short_commit_name(the_repository, commit);
 
 	subject_len = find_commit_subject(out->message, &subject);
 
@@ -2383,7 +2382,7 @@  static int do_pick_commit(struct repository *r,
 		error(command == TODO_REVERT
 		      ? _("could not revert %s... %s")
 		      : _("could not apply %s... %s"),
-		      short_commit_name(commit), msg.subject);
+		      short_commit_name(r, commit), msg.subject);
 		print_advice(r, res == 1, opts);
 		repo_rerere(r, opts->allow_rerere_auto);
 		goto leave;
@@ -3172,7 +3171,8 @@  static int walk_revs_populate_todo(struct todo_list *todo_list,
 		item->offset_in_buf = todo_list->buf.len;
 		subject_len = find_commit_subject(commit_buffer, &subject);
 		strbuf_addf(&todo_list->buf, "%s %s %.*s\n", command_string,
-			short_commit_name(commit), subject_len, subject);
+			short_commit_name(opts->revs->repo, commit),
+			subject_len, subject);
 		repo_unuse_commit_buffer(the_repository, commit,
 					 commit_buffer);
 	}
@@ -3593,7 +3593,7 @@  static int error_with_patch(struct repository *r,
 	} else if (exit_code) {
 		if (commit)
 			fprintf_ln(stderr, _("Could not apply %s... %.*s"),
-				   short_commit_name(commit), subject_len, subject);
+				   short_commit_name(r, commit), subject_len, subject);
 		else
 			/*
 			 * We don't have the hash of the parent so
@@ -4728,7 +4728,7 @@  static int pick_commits(struct repository *r,
 						term_clear_line();
 					fprintf(stderr,
 						_("Stopped at %s...  %.*s\n"),
-						short_commit_name(commit),
+						short_commit_name(r, commit),
 						item->arg_len, arg);
 				}
 				return error_with_patch(r, commit,
@@ -5564,7 +5564,7 @@  static int make_script_with_merges(struct pretty_print_context *pp,
 		if (!is_empty && (commit->object.flags & PATCHSAME)) {
 			if (flags & TODO_LIST_WARN_SKIPPED_CHERRY_PICKS)
 				warning(_("skipped previously applied commit %s"),
-					short_commit_name(commit));
+					short_commit_name(revs->repo, commit));
 			skipped_commit = 1;
 			continue;
 		}
@@ -5800,7 +5800,7 @@  int sequencer_make_script(struct repository *r, struct strbuf *out, int argc,
 		if (!is_empty && (commit->object.flags & PATCHSAME)) {
 			if (flags & TODO_LIST_WARN_SKIPPED_CHERRY_PICKS)
 				warning(_("skipped previously applied commit %s"),
-					short_commit_name(commit));
+					short_commit_name(r, commit));
 			skipped_commit = 1;
 			continue;
 		}
@@ -5892,7 +5892,8 @@  static void todo_list_add_exec_commands(struct todo_list *todo_list,
 	todo_list->alloc = alloc;
 }
 
-static void todo_list_to_strbuf(struct repository *r, struct todo_list *todo_list,
+static void todo_list_to_strbuf(struct repository *r,
+				struct todo_list *todo_list,
 				struct strbuf *buf, int num, unsigned flags)
 {
 	struct todo_item *item;
@@ -5921,7 +5922,7 @@  static void todo_list_to_strbuf(struct repository *r, struct todo_list *todo_lis
 		/* add commit id */
 		if (item->commit) {
 			const char *oid = flags & TODO_LIST_SHORTEN_IDS ?
-					  short_commit_name(item->commit) :
+					  short_commit_name(r, item->commit) :
 					  oid_to_hex(&item->commit->object.oid);
 
 			if (item->command == TODO_FIXUP) {