diff mbox series

[v7] submodule merge: update conflict error message

Message ID 20220728211221.2913928-1-calvinwan@google.com (mailing list archive)
State Superseded
Headers show
Series [v7] submodule merge: update conflict error message | expand

Commit Message

Calvin Wan July 28, 2022, 9:12 p.m. UTC
When attempting to merge in a superproject with conflicting submodule
pointers that cannot be fast-forwarded or trivially resolved, the merge
fails and Git prints an error message that accurately describes the
failure, but does not provide steps for the user to resolve the error.

Git is left in a conflicted state, which requires the user to:
 1. merge submodules or update submodules to an already existing
	commit that reflects the merge
 2. add submodules changes to the superproject
 3. finish merging superproject
These steps are non-obvious for newer submodule users to figure out
based on the error message and neither `git submodule status` nor `git
status` provide any useful pointers.

Update error message to provide steps to resolve submodule merge
conflict. Future work could involve adding an advice flag to the
message. Although the message is long, it also has the id of the 
submodule commit that needs to be merged, which could be useful
information for the user.

Additionally, 4 merge failures that resulted in an early return have
been updated to reflect the status of the merge.
1. Null merge base (null o): CONFLICT_SUBMODULE_NULL_MERGE_BASE added
   as a new conflict type and will print updated error message.
2. Null merge side a (null a): BUG(). See [1] for discussion
3. Null merge side b (null b): BUG(). See [1] for discussion
4. Submodule not checked out: Still returns early, but added a
   NEEDSWORK bit since current error message does not reflect the
   correct resolution

[1] https://lore.kernel.org/git/CABPp-BE0qGwUy80dmVszkJQ+tcpfLRW0OZyErymzhZ9+HWY1mw@mail.gmail.com/

Signed-off-by: Calvin Wan <calvinwan@google.com>
---

For Elijah: Cleaned up the small nits and updated resolutions for
those 4 cases we discussed.

For Ævar: Apologies for misunderstanding your suggestions to make
my messages easier for translators to work with. I have reformatted
all of the messages to separate text vs formatting translations. Let
me know if this is what you were expecting.

 merge-ort.c                 | 112 ++++++++++++++++++++++++++++++++++--
 t/t6437-submodule-merge.sh  |  78 +++++++++++++++++++++++--
 t/t7402-submodule-rebase.sh |   9 ++-
 3 files changed, 185 insertions(+), 14 deletions(-)


base-commit: 9dd64cb4d310986dd7b8ca7fff92f9b61e0bd21a

Comments

Ævar Arnfjörð Bjarmason July 28, 2022, 11:22 p.m. UTC | #1
On Thu, Jul 28 2022, Calvin Wan wrote:

> For Elijah: Cleaned up the small nits and updated resolutions for
> those 4 cases we discussed.
>
> For Ævar: Apologies for misunderstanding your suggestions to make
> my messages easier for translators to work with. I have reformatted
> all of the messages to separate text vs formatting translations. Let
> me know if this is what you were expecting.

Let's take a look, and thanks for sticking with this...

>
>  merge-ort.c                 | 112 ++++++++++++++++++++++++++++++++++--
>  t/t6437-submodule-merge.sh  |  78 +++++++++++++++++++++++--
>  t/t7402-submodule-rebase.sh |   9 ++-
>  3 files changed, 185 insertions(+), 14 deletions(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 01f150ef3b..4302e900ee 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -387,6 +387,9 @@ struct merge_options_internal {
>  
>  	/* call_depth: recursion level counter for merging merge bases */
>  	int call_depth;
> +
> +	/* field that holds submodule conflict information */
> +	struct string_list conflicted_submodules;
>  };
>  
>  struct version_info {
> @@ -517,6 +520,7 @@ enum conflict_and_info_types {
>  	CONFLICT_SUBMODULE_NOT_INITIALIZED,
>  	CONFLICT_SUBMODULE_HISTORY_NOT_AVAILABLE,
>  	CONFLICT_SUBMODULE_MAY_HAVE_REWINDS,
> +	CONFLICT_SUBMODULE_NULL_MERGE_BASE,
>  
>  	/* Keep this entry _last_ in the list */
>  	NB_CONFLICT_TYPES,
> @@ -570,6 +574,8 @@ static const char *type_short_descriptions[] = {
>  		"CONFLICT (submodule history not available)",
>  	[CONFLICT_SUBMODULE_MAY_HAVE_REWINDS] =
>  		"CONFLICT (submodule may have rewinds)",
> +	[CONFLICT_SUBMODULE_NULL_MERGE_BASE] =
> +		"CONFLICT (submodule no merge base)"
>  };
>  
>  struct logical_conflict_info {
> @@ -686,6 +692,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
>  
>  	mem_pool_discard(&opti->pool, 0);
>  
> +	string_list_clear(&opti->conflicted_submodules, 1);
> +
>  	/* Clean out callback_data as well. */
>  	FREE_AND_NULL(renames->callback_data);
>  	renames->callback_data_nr = renames->callback_data_alloc = 0;
> @@ -1744,26 +1752,40 @@ static int merge_submodule(struct merge_options *opt,
>  
>  	int i;
>  	int search = !opt->priv->call_depth;
> +	int sub_initialized = 1;
>  
>  	/* store fallback answer in result in case we fail */
>  	oidcpy(result, opt->priv->call_depth ? o : a);
>  
>  	/* we can not handle deletion conflicts */
> -	if (is_null_oid(o))
> -		return 0;
>  	if (is_null_oid(a))
> -		return 0;
> +		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()"); 
>  	if (is_null_oid(b))
> -		return 0;
> +		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()");
>  
> -	if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) {
> +	if ((sub_initialized = repo_submodule_init(&subrepo,
> +									opt->repo, path, null_oid()))) {
>  		path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0,
>  			 path, NULL, NULL, NULL,
>  			 _("Failed to merge submodule %s (not checked out)"),
>  			 path);
> +		/*
> +		 * NEEDSWORK: Since the steps to resolve this error are
> +		 * more involved than what is currently in 
> +		 * print_submodule_conflict_suggestion(), we return
> +		 * immediately rather than generating an error message
> +		 */
>  		return 0;
>  	}
>  
> +	if (is_null_oid(o)) {
> +		path_msg(opt, CONFLICT_SUBMODULE_NULL_MERGE_BASE, 0,
> +			 path, NULL, NULL, NULL,
> +			 _("Failed to merge submodule %s (no merge base)"),
> +			 path);
> +		goto cleanup;
> +	}
> +
>  	if (!(commit_o = lookup_commit_reference(&subrepo, o)) ||
>  	    !(commit_a = lookup_commit_reference(&subrepo, a)) ||
>  	    !(commit_b = lookup_commit_reference(&subrepo, b))) {
> @@ -1849,7 +1871,15 @@ static int merge_submodule(struct merge_options *opt,
>  
>  	object_array_clear(&merges);
>  cleanup:
> -	repo_clear(&subrepo);
> +	if (!opt->priv->call_depth && !ret) {
> +		struct string_list *csub = &opt->priv->conflicted_submodules;
> +
> +		string_list_append(csub, path)->util =
> +				xstrdup(repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV));

FWIW (since you may have changed this due to my comment) I meant (but
maybe didn't make clear enough) in
https://lore.kernel.org/git/220727.86ilnilxfv.gmgdl@evledraar.gmail.com/
that just getting rid of the "util" variable could trivially be done, so:

	struct string_list *csub = &opt->priv->conflicted_submodules;
	const char *abbrev;

	abbrev = repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV);

	string_list_append(csub, path)->util = xstrdup(abbrev);

What you have here is also just fine, but in case you traded the line
variable for line wrapping I think it's just fine (and actually
preferable) to just make an intermediate variable to avoid this sort of
line wrapping.

Anyway, just clarifying...

> +static void print_submodule_conflict_suggestion(struct string_list *csub) {
> +	if (csub->nr > 0) {

Since the entire body of the function is guarded by this maybe avoid the
indentation and:

	if (!csub->nr)
		return;


> +		struct string_list_item *item;
> +		struct strbuf msg = STRBUF_INIT;
> +		struct strbuf tmp = STRBUF_INIT;
> +
> +		strbuf_addf(&tmp, _("Recursive merging with submodules currently only supports trivial cases."));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("Please manually handle the merging of each conflicted submodule."));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("This can be accomplished with the following steps:"));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);

This was *almost* correct in your v6, i.e.:

	printf(_("Recursive merging with submodules currently only supports trivial cases.\n"
		 "Please manually handle the merging of each conflicted submodule.\n"
		 "This can be accomplished with the following steps:\n"));

What I meant with "We can add \n\n unconditionally, no need to put it in
the translation." in
https://lore.kernel.org/git/220727.86ilnilxfv.gmgdl@evledraar.gmail.com/
is that you should avoid adding formatting to translations themselves if
possible.

i.e. what we're translating here is a full paragraph, so to a translator
it doesn't matter if it ends with ":\n" or ":", so we can add the last
"\n" unconditionalyl.

But we should *not* add the \n's within a single paragraph
unconditionally, a translator needs to be able to translate that entire
string.

Different languages split sentences differently, and different
conventions in different languages & SVO v.s. OVS or wahtever (see
https://en.wikipedia.org/wiki/Word_order) means that your last sentence
might need to come first in some languages.

So I think this should just be:

	printf(_("Recursive merging with submodules currently only supports trivial cases.\n"
		 "Please manually handle the merging of each conflicted submodule.\n"
		 "This can be accomplished with the following steps:"));
	putchar('\n');

I.e. the "\n" within the pargraph are imporant, and translators need to
be able to amend those, and perhaps some languages will have 2x, some 4x
or whatever.

Whereas the "\n" at the end is something we can always add, because it's
just a matter of how we're interpolating the paragraph into other output
(think *nix terminal v.s. say HTML, just as an example).

> +
> +		for_each_string_list_item(item, csub) {
> +			const char *abbrev= item->util;
> +			/*
> +			 * TRANSLATORS: This is a line of advice to resolve a merge conflict
> +			 * in a submodule. The second argument is the abbreviated id of the
> +			 * commit that needs to be merged.
> +			 * E.g. - go to submodule (sub), and either merge commit abc1234"
> +			 */
> +			strbuf_addf(&tmp, _("go to submodule (%s), and either merge commit %s"),
> +													item->string, abbrev);

...

> +			strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +			strbuf_addf(&msg, "\n");
> +			strbuf_release(&tmp);
> +			strbuf_addf(&tmp, _("or update to an existing commit which has merged those changes"));
> +			strbuf_addf(&msg, _("   %s"), tmp.buf);
> +			strbuf_addf(&msg, "\n");
> +			strbuf_release(&tmp);

Similarly this is worse, because we're now splitting up up a single
sentence or paragraph into multiple translations.

FWIW I meant that you could write some helper function that would do
something like this (in Perl, too lazy to write C now):

	perl -wE '
		my @lines = split /\n/, $ARGV[0];
		for (my $i = 0; $i < @lines; $i++) {
			my $pfx = !$i ? "- " : "  "; say "$pfx$lines[$i]";
	}' "$(printf "some multi\nline paragraph\nhere")"
	- some multi
	  line paragraph
	  here

I.e. the translator would see:

	_("some multi\nline paragraph\nhere")

Which would allow them to insert any amount of newlines, but you'd just
have a utility function that:

 * Splits those lines by \n
 * Formats the first line by _("- %s\n")
 * Formats subsequent lines with _("  %s\n") 
 * The %s in those is a _()'d string.

I.e. what's happening at the bottom of show_ambiguous_object(). It's:

 * A relatively small amount of programming,
 * Lets translators focus only on these formatting questions for the
   translations that do the magic formatting, and those only need to be
   changed for RTL languages.

   I.e. German and English whill both want "- %s\n", but e.g. Hebrew
   will want "%s -\n".

 * Makes the code easier to read, because instead of adding formatting concerns to every string, you'd just:

	strbuf_addstr(&buf, add_fmt_list_item(i, _("blah bla\nblah blah\nblah.)));

> +		strbuf_release(&tmp);

When you use the strbuf API you should strbuf_reset() within a single
scope if you want to "clear" it, not strbuf_release().

The strbuf_release() also works, but then it's a malloc()/free(), each
time, with strbuf_reset() we don't free() it, we just set the len to
zero, and the content to "\0", but remember how long it was ("alloc" in
strbuf.h).

You then only do the strbuf_release() at the very end.
Elijah Newren July 29, 2022, 12:24 a.m. UTC | #2
On Thu, Jul 28, 2022 at 2:12 PM Calvin Wan <calvinwan@google.com> wrote:
>
> When attempting to merge in a superproject with conflicting submodule
> pointers that cannot be fast-forwarded or trivially resolved, the merge
> fails and Git prints an error message that accurately describes the
> failure, but does not provide steps for the user to resolve the error.
>
> Git is left in a conflicted state, which requires the user to:
>  1. merge submodules or update submodules to an already existing
>         commit that reflects the merge
>  2. add submodules changes to the superproject
>  3. finish merging superproject
> These steps are non-obvious for newer submodule users to figure out
> based on the error message and neither `git submodule status` nor `git
> status` provide any useful pointers.
>
> Update error message to provide steps to resolve submodule merge
> conflict. Future work could involve adding an advice flag to the
> message. Although the message is long, it also has the id of the
> submodule commit that needs to be merged, which could be useful
> information for the user.
>
> Additionally, 4 merge failures that resulted in an early return have
> been updated to reflect the status of the merge.
> 1. Null merge base (null o): CONFLICT_SUBMODULE_NULL_MERGE_BASE added
>    as a new conflict type and will print updated error message.
> 2. Null merge side a (null a): BUG(). See [1] for discussion
> 3. Null merge side b (null b): BUG(). See [1] for discussion
> 4. Submodule not checked out: Still returns early, but added a
>    NEEDSWORK bit since current error message does not reflect the
>    correct resolution

s/does not reflect the correct resolution/also deserves a more
detailed explanation of how to resolve/ ?

>
> [1] https://lore.kernel.org/git/CABPp-BE0qGwUy80dmVszkJQ+tcpfLRW0OZyErymzhZ9+HWY1mw@mail.gmail.com/
>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> ---
>
> For Elijah: Cleaned up the small nits and updated resolutions for
> those 4 cases we discussed.

Thanks.  Including a range-diff in the cover letter would be really
helpful, for future reference.

>
> For Ævar: Apologies for misunderstanding your suggestions to make
> my messages easier for translators to work with. I have reformatted
> all of the messages to separate text vs formatting translations. Let
> me know if this is what you were expecting.
>
>  merge-ort.c                 | 112 ++++++++++++++++++++++++++++++++++--
>  t/t6437-submodule-merge.sh  |  78 +++++++++++++++++++++++--
>  t/t7402-submodule-rebase.sh |   9 ++-
>  3 files changed, 185 insertions(+), 14 deletions(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 01f150ef3b..4302e900ee 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -387,6 +387,9 @@ struct merge_options_internal {
>
>         /* call_depth: recursion level counter for merging merge bases */
>         int call_depth;
> +
> +       /* field that holds submodule conflict information */
> +       struct string_list conflicted_submodules;
>  };
>
>  struct version_info {
> @@ -517,6 +520,7 @@ enum conflict_and_info_types {
>         CONFLICT_SUBMODULE_NOT_INITIALIZED,
>         CONFLICT_SUBMODULE_HISTORY_NOT_AVAILABLE,
>         CONFLICT_SUBMODULE_MAY_HAVE_REWINDS,
> +       CONFLICT_SUBMODULE_NULL_MERGE_BASE,
>
>         /* Keep this entry _last_ in the list */
>         NB_CONFLICT_TYPES,
> @@ -570,6 +574,8 @@ static const char *type_short_descriptions[] = {
>                 "CONFLICT (submodule history not available)",
>         [CONFLICT_SUBMODULE_MAY_HAVE_REWINDS] =
>                 "CONFLICT (submodule may have rewinds)",
> +       [CONFLICT_SUBMODULE_NULL_MERGE_BASE] =
> +               "CONFLICT (submodule no merge base)"
>  };
>
>  struct logical_conflict_info {
> @@ -686,6 +692,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
>
>         mem_pool_discard(&opti->pool, 0);
>
> +       string_list_clear(&opti->conflicted_submodules, 1);
> +
>         /* Clean out callback_data as well. */
>         FREE_AND_NULL(renames->callback_data);
>         renames->callback_data_nr = renames->callback_data_alloc = 0;
> @@ -1744,26 +1752,40 @@ static int merge_submodule(struct merge_options *opt,
>
>         int i;
>         int search = !opt->priv->call_depth;
> +       int sub_initialized = 1;
>
>         /* store fallback answer in result in case we fail */
>         oidcpy(result, opt->priv->call_depth ? o : a);
>
>         /* we can not handle deletion conflicts */
> -       if (is_null_oid(o))
> -               return 0;
>         if (is_null_oid(a))
> -               return 0;
> +               BUG("submodule deleted on one side; this should be handled outside of merge_submodule()");
>         if (is_null_oid(b))
> -               return 0;
> +               BUG("submodule deleted on one side; this should be handled outside of merge_submodule()");
>
> -       if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) {
> +       if ((sub_initialized = repo_submodule_init(&subrepo,
> +                                                                       opt->repo, path, null_oid()))) {

Um, repo_submodule_init() returns 0 on success, non-zero upon failure.
So !sub_initialized means "submodule IS initialized", though it
appears to read to mean the opposite.  Can you consider renaming your
variable (maybe just to "sub_not_initialized")?

>                 path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0,
>                          path, NULL, NULL, NULL,
>                          _("Failed to merge submodule %s (not checked out)"),
>                          path);
> +               /*
> +                * NEEDSWORK: Since the steps to resolve this error are
> +                * more involved than what is currently in
> +                * print_submodule_conflict_suggestion(), we return
> +                * immediately rather than generating an error message
> +                */

Technically, we just did generate an error message ("Failed to merge
submodule %s (not checked out)").

Maybe replace "immediately rather than generating an error message"
with "immediately.  It would be better to "goto cleanup" here, after
setting some flag requesting a more detailed message be saved for
print_submodule_conflict_suggetion()".  Or something like that.

>                 return 0;
>         }
>
> +       if (is_null_oid(o)) {
> +               path_msg(opt, CONFLICT_SUBMODULE_NULL_MERGE_BASE, 0,
> +                        path, NULL, NULL, NULL,
> +                        _("Failed to merge submodule %s (no merge base)"),
> +                        path);
> +               goto cleanup;
> +       }

Does this need to be moved after initializing the submodule?  I
thought that was the point of introducing the sub_initialized
variable, and we're clearly not going to use it, so it would seem to
make more sense to not initialize it for this case.

> +
>         if (!(commit_o = lookup_commit_reference(&subrepo, o)) ||
>             !(commit_a = lookup_commit_reference(&subrepo, a)) ||
>             !(commit_b = lookup_commit_reference(&subrepo, b))) {
> @@ -1849,7 +1871,15 @@ static int merge_submodule(struct merge_options *opt,
>
>         object_array_clear(&merges);
>  cleanup:
> -       repo_clear(&subrepo);
> +       if (!opt->priv->call_depth && !ret) {
> +               struct string_list *csub = &opt->priv->conflicted_submodules;
> +
> +               string_list_append(csub, path)->util =
> +                               xstrdup(repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV));
> +       }
> +
> +       if (!sub_initialized)
> +               repo_clear(&subrepo);
>         return ret;
>  }
>
> @@ -4412,6 +4442,73 @@ static int record_conflicted_index_entries(struct merge_options *opt)
>         return errs;
>  }
>
> +static void print_submodule_conflict_suggestion(struct string_list *csub) {
> +       if (csub->nr > 0) {
> +               struct string_list_item *item;
> +               struct strbuf msg = STRBUF_INIT;
> +               struct strbuf tmp = STRBUF_INIT;
> +
> +               strbuf_addf(&tmp, _("Recursive merging with submodules currently only supports trivial cases."));
> +               strbuf_addf(&msg, "%s\n", tmp.buf);
> +               strbuf_release(&tmp);

Instead of these three lines, why not just
    strbuf_addstr(&msg, "%s\n", _("Recursive merging with submodules
currently only supports trivial cases."));
?

Same applies for some of the messages below too.

> +               strbuf_addf(&tmp, _("Please manually handle the merging of each conflicted submodule."));
> +               strbuf_addf(&msg, "%s\n", tmp.buf);
> +               strbuf_release(&tmp);
> +
> +               strbuf_addf(&tmp, _("This can be accomplished with the following steps:"));
> +               strbuf_addf(&msg, "%s\n", tmp.buf);
> +               strbuf_release(&tmp);
> +
> +               for_each_string_list_item(item, csub) {
> +                       const char *abbrev= item->util;
> +                       /*
> +                        * TRANSLATORS: This is a line of advice to resolve a merge conflict
> +                        * in a submodule. The second argument is the abbreviated id of the
> +                        * commit that needs to be merged.
> +                        * E.g. - go to submodule (sub), and either merge commit abc1234"
> +                        */
> +                       strbuf_addf(&tmp, _("go to submodule (%s), and either merge commit %s"),
> +                                                                                                       item->string, abbrev);
> +                       strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +                       strbuf_addf(&msg, "\n");
> +                       strbuf_release(&tmp);
> +                       strbuf_addf(&tmp, _("or update to an existing commit which has merged those changes"));
> +                       strbuf_addf(&msg, _("   %s"), tmp.buf);
> +                       strbuf_addf(&msg, "\n");
> +                       strbuf_release(&tmp);
> +               }
> +               strbuf_addf(&tmp, _("come back to superproject and run:"));
> +               strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +               strbuf_addf(&msg, "\n\n");
> +               strbuf_release(&tmp);
> +
> +               strbuf_addf(&tmp, "git add ");
> +               strbuf_add_separated_string_list(&tmp, " ", csub);
> +               strbuf_addf(&msg, _("       %s"), tmp.buf);
> +               strbuf_addf(&msg, "\n\n");
> +               strbuf_release(&tmp);
> +
> +               strbuf_addf(&tmp, _("to record the above merge or update"));
> +               strbuf_addf(&msg, _("   %s"), tmp.buf);
> +               strbuf_addf(&msg, "\n");
> +               strbuf_release(&tmp);
> +
> +               strbuf_addf(&tmp, _("resolve any other conflicts in the superproject"));
> +               strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +               strbuf_addf(&msg, "\n");
> +               strbuf_release(&tmp);
> +
> +               strbuf_addf(&tmp, _("commit the resulting index in the superproject"));
> +               strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +               strbuf_addf(&msg, "\n");
> +               strbuf_release(&tmp);
> +
> +               printf("%s", msg.buf);
> +               strbuf_release(&msg);

Yuck.  I'm not a translator, so maybe what you are doing is preferred.
But wouldn't translators find it annoying to have to translate " - %s"
and "  %s" in all these places (and wouldn't there need to be a
TRANSLATORS comment before each and every one)?  Also, are we running
the risk of these short strings needing to be translated differently
here than in other places?  For example, what if someone is trying to
align output nicely:

    Name:   John Johnson
    Date:   2022-07-28
    Gender: Male

and uses _("  %s") when printing the name to get the indent right, and
expects other languages to add or remove spaces accordingly?  Or has
other built up messages without hyphens but uses _("  %s") for
whatever reason that may not match what you are doing here?

If there are multiple _("  %s") in the code, that string can only be
translated once.  Not once per codepath, but once total.  I'm afraid
this lego building of messages seems to risk needing
translation-per-codepath when that just isn't possible.

I also find the big block of code somewhat painful to read.  Could we
instead do something like (note, I have both a tmp and tmp2):

    strbuf_add_separated_string_list(&tmp2, " ", csub);

    for_each_string_list_item(item, csub) {
        const char *abbrev= item->util;
        /*
         * TRANSLATORS: This is a line of advice to resolve a merge conflict
         * in a submodule. The second argument is the abbreviated id of the
         * commit that needs to be merged.
         * E.g. - go to submodule (sub), and either merge commit abc1234
         */
        strbuf_addf(&tmp, _(" - go to submodule %s, and either merge
commit %s\n"
                            "   or update to an existing commit which
has merged those changes\n"),
                            item->string, abbrev);
    }

    strbuf_addf(&msg,
                _("Recursive merging with submodules currently only
supports trivial cases.\n"
                  "Please manually handle the merging of each
conflicted submodule.\n"
                  "This can be accomplished with the following steps:\n"
                  "%s"
                  " - come back to superproject and run:\n\n"
                  "      git add %s\n\n"
                  "   to record the above merge or update\n"
                  " - resolve any other conflicts in the superproject\n"
                  " - commit the resulting index in the superproject\n"),
                  tmp.buf, tmp2.buf);

This will give translators precisely two messages to translate (and we
can't drop it to one since one of the two is repeated a variable
number of times), and provide more built-in context about how to
translate since the whole message is involved.  If one of the messages
translates into something especially long, they can even add line
breaks and reflow the paragraph in ways that make sense for them,
which your current version just doesn't permit.


> +       }
> +}
> +
>  void merge_display_update_messages(struct merge_options *opt,
>                                    int detailed,
>                                    struct merge_result *result)
> @@ -4461,6 +4558,8 @@ void merge_display_update_messages(struct merge_options *opt,
>         }
>         string_list_clear(&olist, 0);
>
> +       print_submodule_conflict_suggestion(&opti->conflicted_submodules);
> +
>         /* Also include needed rename limit adjustment now */
>         diff_warn_rename_limit("merge.renamelimit",
>                                opti->renames.needed_limit, 0);
> @@ -4657,6 +4756,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
>         trace2_region_enter("merge", "allocate/init", opt->repo);
>         if (opt->priv) {
>                 clear_or_reinit_internal_opts(opt->priv, 1);
> +               string_list_init_nodup(&opt->priv->conflicted_submodules);
>                 trace2_region_leave("merge", "allocate/init", opt->repo);
>                 return;
>         }
> diff --git a/t/t6437-submodule-merge.sh b/t/t6437-submodule-merge.sh
> index c253bf759a..414597a420 100755
> --- a/t/t6437-submodule-merge.sh
> +++ b/t/t6437-submodule-merge.sh
> @@ -103,8 +103,25 @@ test_expect_success 'setup for merge search' '
>          echo "file-c" > file-c &&
>          git add file-c &&
>          git commit -m "sub-c") &&
> -       git commit -a -m "c" &&
> +       git commit -a -m "c")
> +'
>
> +test_expect_success 'merging should conflict for non fast-forward' '
> +       test_when_finished "git -C merge-search reset --hard" &&
> +       (cd merge-search &&
> +        git checkout -b test-nonforward-a b &&
> +         if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> +         then
> +               test_must_fail git merge c >actual &&
> +               sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
> +               grep "$sub_expect" actual
> +         else
> +               test_must_fail git merge c 2> actual
> +         fi)
> +'
> +
> +test_expect_success 'finish setup for merge-search' '
> +       (cd merge-search &&
>         git checkout -b d a &&
>         (cd sub &&
>          git checkout -b sub-d sub-b &&
> @@ -129,14 +146,16 @@ test_expect_success 'merge with one side as a fast-forward of the other' '
>          test_cmp expect actual)
>  '
>
> -test_expect_success 'merging should conflict for non fast-forward' '
> +test_expect_success 'merging should conflict for non fast-forward (resolution exists)' '
>         (cd merge-search &&
> -        git checkout -b test-nonforward b &&
> +        git checkout -b test-nonforward-b b &&
>          (cd sub &&
>           git rev-parse --short sub-d > ../expect) &&
>           if test "$GIT_TEST_MERGE_ALGORITHM" = ort
>           then
> -               test_must_fail git merge c >actual
> +               test_must_fail git merge c >actual &&
> +               sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
> +               grep "$sub_expect" actual
>           else
>                 test_must_fail git merge c 2> actual
>           fi &&
> @@ -161,7 +180,9 @@ test_expect_success 'merging should fail for ambiguous common parent' '
>          ) &&
>          if test "$GIT_TEST_MERGE_ALGORITHM" = ort
>          then
> -               test_must_fail git merge c >actual
> +               test_must_fail git merge c >actual &&
> +               sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
> +               grep "$sub_expect" actual
>          else
>                 test_must_fail git merge c 2> actual
>          fi &&
> @@ -205,7 +226,12 @@ test_expect_success 'merging should fail for changes that are backwards' '
>         git commit -a -m "f" &&
>
>         git checkout -b test-backward e &&
> -       test_must_fail git merge f)
> +       test_must_fail git merge f >actual &&
> +       if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> +    then
> +               sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-d)" &&
> +               grep "$sub_expect" actual
> +       fi)
>  '
>
>
> @@ -476,4 +502,44 @@ test_expect_failure 'directory/submodule conflict; merge --abort works afterward
>         )
>  '
>
> +# Setup:
> +#   - Submodule has 2 commits: a and b
> +#   - Superproject branch 'a' adds and commits submodule pointing to 'commit a'
> +#   - Superproject branch 'b' adds and commits submodule pointing to 'commit b'
> +# If these two branches are now merged, there is no merge base
> +test_expect_success 'setup for null merge base' '
> +       mkdir no-merge-base &&
> +       (cd no-merge-base &&
> +       git init &&
> +       mkdir sub &&
> +       (cd sub &&
> +        git init &&
> +        echo "file-a" > file-a &&
> +        git add file-a &&
> +        git commit -m "commit a") &&
> +       git commit --allow-empty -m init &&
> +       git branch init &&
> +       git checkout -b a init &&
> +       git add sub &&
> +       git commit -m "a" &&
> +       git switch main &&
> +       (cd sub &&
> +        echo "file-b" > file-b &&
> +        git add file-b &&
> +        git commit -m "commit b"))
> +'
> +
> +test_expect_success 'merging should fail with no merge base' '
> +       (cd no-merge-base &&
> +       git checkout -b b init &&
> +       git add sub &&
> +       git commit -m "b" &&
> +       test_must_fail git merge a >actual &&
> +       if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> +    then
> +               sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short HEAD^1)" &&
> +               grep "$sub_expect" actual
> +       fi)
> +'
> +
>  test_done
> diff --git a/t/t7402-submodule-rebase.sh b/t/t7402-submodule-rebase.sh
> index 8e32f19007..ebeca12a71 100755
> --- a/t/t7402-submodule-rebase.sh
> +++ b/t/t7402-submodule-rebase.sh
> @@ -104,7 +104,7 @@ test_expect_success 'rebasing submodule that should conflict' '
>         test_tick &&
>         git commit -m fourth &&
>
> -       test_must_fail git rebase --onto HEAD^^ HEAD^ HEAD^0 &&
> +       test_must_fail git rebase --onto HEAD^^ HEAD^ HEAD^0 >actual_output &&
>         git ls-files -s submodule >actual &&
>         (
>                 cd submodule &&
> @@ -112,7 +112,12 @@ test_expect_success 'rebasing submodule that should conflict' '
>                 echo "160000 $(git rev-parse HEAD^^) 2  submodule" &&
>                 echo "160000 $(git rev-parse HEAD) 3    submodule"
>         ) >expect &&
> -       test_cmp expect actual
> +       test_cmp expect actual &&
> +       if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> +    then
> +               sub_expect="go to submodule (submodule), and either merge commit $(git -C submodule rev-parse --short HEAD^0)" &&
> +               grep "$sub_expect" actual_output
> +       fi
>  '
>
>  test_done
>
> base-commit: 9dd64cb4d310986dd7b8ca7fff92f9b61e0bd21a
> --
> 2.37.1.455.g008518b4e5-goog
>
Ævar Arnfjörð Bjarmason Aug. 1, 2022, 12:06 p.m. UTC | #3
On Thu, Jul 28 2022, Calvin Wan wrote:

> +		strbuf_addf(&tmp, _("Recursive merging with submodules currently only supports trivial cases."));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);

Aside from anything else mentioned, please run the "static-analysis" job
in CI (or coccicheck locally) to sanity check patches. The "seen" branch
is currently failing on this & similar API use:
https://github.com/git/git/runs/7587382291?check_suite_focus=true

Per my previous feedback we should change this anyway, but that would
have shown that doing this particular thing is better done with
strbuf_addbuf() (and actually buggy with strbuf_addf(), if we had a
vector where format specifiers could be inserted).
Calvin Wan Aug. 1, 2022, 10:24 p.m. UTC | #4
> > 4. Submodule not checked out: Still returns early, but added a
> >    NEEDSWORK bit since current error message does not reflect the
> >    correct resolution
>
> s/does not reflect the correct resolution/also deserves a more
> detailed explanation of how to resolve/ ?

ack

> Thanks.  Including a range-diff in the cover letter would be really
> helpful, for future reference.

will do

> Um, repo_submodule_init() returns 0 on success, non-zero upon failure.
> So !sub_initialized means "submodule IS initialized", though it
> appears to read to mean the opposite.  Can you consider renaming your
> variable (maybe just to "sub_not_initialized")?

ack


> Technically, we just did generate an error message ("Failed to merge
> submodule %s (not checked out)").
>
> Maybe replace "immediately rather than generating an error message"
> with "immediately.  It would be better to "goto cleanup" here, after
> setting some flag requesting a more detailed message be saved for
> print_submodule_conflict_suggetion()".  Or something like that.

Sounds like a better place to put the NEEDSWORK bit.

>
> >                 return 0;
> >         }
> >
> > +       if (is_null_oid(o)) {
> > +               path_msg(opt, CONFLICT_SUBMODULE_NULL_MERGE_BASE, 0,
> > +                        path, NULL, NULL, NULL,
> > +                        _("Failed to merge submodule %s (no merge base)"),
> > +                        path);
> > +               goto cleanup;
> > +       }
>
> Does this need to be moved after initializing the submodule?  I
> thought that was the point of introducing the sub_initialized
> variable, and we're clearly not going to use it, so it would seem to
> make more sense to not initialize it for this case.

The submodule needs to be initialized to generate part of the error
message. See the following in cleanup:

repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV)

> Yuck.  I'm not a translator, so maybe what you are doing is preferred.
> But wouldn't translators find it annoying to have to translate " - %s"
> and "  %s" in all these places (and wouldn't there need to be a
> TRANSLATORS comment before each and every one)?

As per Avar's suggestion, I made a helper function so translators
only have to translate " - %s" and "  %s" once. This change does
end up lining up the `git add ...` command, but I think that's fine

- come back to superproject and run:

   git add sub3 sub2 sub

   to record the above merge or update

> I also find the big block of code somewhat painful to read.  Could we
> instead do something like (note, I have both a tmp and tmp2):
>
>     strbuf_add_separated_string_list(&tmp2, " ", csub);
>
>     for_each_string_list_item(item, csub) {
>         const char *abbrev= item->util;
>         /*
>          * TRANSLATORS: This is a line of advice to resolve a merge conflict
>          * in a submodule. The second argument is the abbreviated id of the
>          * commit that needs to be merged.
>          * E.g. - go to submodule (sub), and either merge commit abc1234
>          */
>         strbuf_addf(&tmp, _(" - go to submodule %s, and either merge
> commit %s\n"
>                             "   or update to an existing commit which
> has merged those changes\n"),
>                             item->string, abbrev);
>     }
>
>     strbuf_addf(&msg,
>                 _("Recursive merging with submodules currently only
> supports trivial cases.\n"
>                   "Please manually handle the merging of each
> conflicted submodule.\n"
>                   "This can be accomplished with the following steps:\n"
>                   "%s"
>                   " - come back to superproject and run:\n\n"
>                   "      git add %s\n\n"
>                   "   to record the above merge or update\n"
>                   " - resolve any other conflicts in the superproject\n"
>                   " - commit the resulting index in the superproject\n"),
>                   tmp.buf, tmp2.buf);
>
> This will give translators precisely two messages to translate (and we
> can't drop it to one since one of the two is repeated a variable
> number of times), and provide more built-in context about how to
> translate since the whole message is involved.  If one of the messages
> translates into something especially long, they can even add line
> breaks and reflow the paragraph in ways that make sense for them,
> which your current version just doesn't permit.

I think Avar's suggestion makes translating the formatting easier than
your suggestion, at the cost of having a big block of code to setup it
all up.
Junio C Hamano Aug. 2, 2022, 12:50 a.m. UTC | #5
Calvin Wan <calvinwan@google.com> writes:

>  merge-ort.c                 | 112 ++++++++++++++++++++++++++++++++++--
>  t/t6437-submodule-merge.sh  |  78 +++++++++++++++++++++++--
>  t/t7402-submodule-rebase.sh |   9 ++-
>  3 files changed, 185 insertions(+), 14 deletions(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 01f150ef3b..4302e900ee 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -387,6 +387,9 @@ struct merge_options_internal {
>  
>  	/* call_depth: recursion level counter for merging merge bases */
>  	int call_depth;
> +
> +	/* field that holds submodule conflict information */
> +	struct string_list conflicted_submodules;
>  };
>  
>  struct version_info {
> @@ -517,6 +520,7 @@ enum conflict_and_info_types {
>  	CONFLICT_SUBMODULE_NOT_INITIALIZED,
>  	CONFLICT_SUBMODULE_HISTORY_NOT_AVAILABLE,
>  	CONFLICT_SUBMODULE_MAY_HAVE_REWINDS,
> +	CONFLICT_SUBMODULE_NULL_MERGE_BASE,
>  
>  	/* Keep this entry _last_ in the list */
>  	NB_CONFLICT_TYPES,
> @@ -570,6 +574,8 @@ static const char *type_short_descriptions[] = {
>  		"CONFLICT (submodule history not available)",
>  	[CONFLICT_SUBMODULE_MAY_HAVE_REWINDS] =
>  		"CONFLICT (submodule may have rewinds)",

The other ones are semi sentences ...

> +	[CONFLICT_SUBMODULE_NULL_MERGE_BASE] =
> +		"CONFLICT (submodule no merge base)"

... and this wants to become one, too, e.g. "submodule lacks merge
base", perhaps?

>  };

> @@ -686,6 +692,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
>  
>  	mem_pool_discard(&opti->pool, 0);
>  
> +	string_list_clear(&opti->conflicted_submodules, 1);
> +
>  	/* Clean out callback_data as well. */
>  	FREE_AND_NULL(renames->callback_data);
>  	renames->callback_data_nr = renames->callback_data_alloc = 0;
> @@ -1744,26 +1752,40 @@ static int merge_submodule(struct merge_options *opt,
>  
>  	int i;
>  	int search = !opt->priv->call_depth;
> +	int sub_initialized = 1;
>  
>  	/* store fallback answer in result in case we fail */
>  	oidcpy(result, opt->priv->call_depth ? o : a);
>  
>  	/* we can not handle deletion conflicts */
> -	if (is_null_oid(o))
> -		return 0;
>  	if (is_null_oid(a))
> -		return 0;
> +		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()"); 
>  	if (is_null_oid(b))
> -		return 0;
> +		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()");

OK.  It is interesting that the first condition (i.e. 'o' is
missing) we are removing is not about "we cannot handle deletion",
so this change corrects the code to match the comment.  As the BUG()
messages are the same on these sides,

	if (is_null_oid(a) || is_null_oid(b))
		BUG(...);

may be easier to read, perhaps?

> -	if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) {
> +	if ((sub_initialized = repo_submodule_init(&subrepo,
> +									opt->repo, path, null_oid()))) {

I wonder where this overly deep indentation come from?  Can the
.editorconfig file we ship with the project help?

>  		path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0,
>  			 path, NULL, NULL, NULL,
>  			 _("Failed to merge submodule %s (not checked out)"),
>  			 path);
> +		/*
> +		 * NEEDSWORK: Since the steps to resolve this error are
> +		 * more involved than what is currently in 
> +		 * print_submodule_conflict_suggestion(), we return
> +		 * immediately rather than generating an error message
> +		 */

OK.

>  		return 0;
>  	}
>  
> +	if (is_null_oid(o)) {
> +		path_msg(opt, CONFLICT_SUBMODULE_NULL_MERGE_BASE, 0,
> +			 path, NULL, NULL, NULL,
> +			 _("Failed to merge submodule %s (no merge base)"),
> +			 path);

OK.

> +		goto cleanup;
> +	}

> @@ -1849,7 +1871,15 @@ static int merge_submodule(struct merge_options *opt,
>  
>  	object_array_clear(&merges);
>  cleanup:
> -	repo_clear(&subrepo);
> +	if (!opt->priv->call_depth && !ret) {
> +		struct string_list *csub = &opt->priv->conflicted_submodules;
> +
> +		string_list_append(csub, path)->util =
> +				xstrdup(repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV));

This line could also lose one level of indent.

We record 'b' in its abbreviated form here, because the assumption
is that when merging the superproject, 'a' (which is the assumed
fallback answer wet up at the beginning of the furnction) is the
commit recorded in our side of the superproject, and it is the
commit checked out in the submodule, so the task of making the
necessary merge in the submodule is to go to the submodule and then
merge 'b' into HEAD.  Makes sense.

> @@ -4412,6 +4442,73 @@ static int record_conflicted_index_entries(struct merge_options *opt)
>  	return errs;
>  }
>  
> +static void print_submodule_conflict_suggestion(struct string_list *csub) {
> +	if (csub->nr > 0) {
> +		struct string_list_item *item;
> +		struct strbuf msg = STRBUF_INIT;
> +		struct strbuf tmp = STRBUF_INIT;
> +
> +		strbuf_addf(&tmp, _("Recursive merging with submodules currently only supports trivial cases."));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("Please manually handle the merging of each conflicted submodule."));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("This can be accomplished with the following steps:"));
> +		strbuf_addf(&msg, "%s\n", tmp.buf);
> +		strbuf_release(&tmp);
> +
> +		for_each_string_list_item(item, csub) {
> +			const char *abbrev= item->util;
> +			/*
> +			 * TRANSLATORS: This is a line of advice to resolve a merge conflict
> +			 * in a submodule. The second argument is the abbreviated id of the
> +			 * commit that needs to be merged.
> +			 * E.g. - go to submodule (sub), and either merge commit abc1234"
> +			 */
> +			strbuf_addf(&tmp, _("go to submodule (%s), and either merge commit %s"),
> +													item->string, abbrev);

Is this also an instance of overly deep indentation (it is so deep
that I cannot easily tell)?

When we say "either merge commit %s" (where %s is 'b'---what they
have as we saw earlier), do we need to mention what the value of 'a'
is to the user, or is it redundant because we are absolutely sure
that 'a' is what is checked out in the submodule?

> +			strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +			strbuf_addf(&msg, "\n");
> +			strbuf_release(&tmp);
> +			strbuf_addf(&tmp, _("or update to an existing commit which has merged those changes"));

"those changes" means "a..b"?  Again, I just want to make sure that
the user in this situation knows "the other end" of the range when
they are told only about 'b'.

> +			strbuf_addf(&msg, _("   %s"), tmp.buf);
> +			strbuf_addf(&msg, "\n");
> +			strbuf_release(&tmp);
> +		}
> +		strbuf_addf(&tmp, _("come back to superproject and run:"));
> +		strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +		strbuf_addf(&msg, "\n\n");
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, "git add ");
> +		strbuf_add_separated_string_list(&tmp, " ", csub);
> +		strbuf_addf(&msg, _("       %s"), tmp.buf);
> +		strbuf_addf(&msg, "\n\n");
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("to record the above merge or update"));
> +		strbuf_addf(&msg, _("   %s"), tmp.buf);
> +		strbuf_addf(&msg, "\n");
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("resolve any other conflicts in the superproject"));
> +		strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +		strbuf_addf(&msg, "\n");
> +		strbuf_release(&tmp);
> +
> +		strbuf_addf(&tmp, _("commit the resulting index in the superproject"));
> +		strbuf_addf(&msg, _(" - %s"), tmp.buf);
> +		strbuf_addf(&msg, "\n");
> +		strbuf_release(&tmp);
> +
> +		printf("%s", msg.buf);
> +		strbuf_release(&msg);
> +	}
> +}

OK.  Makes sense.

Thanks.
Calvin Wan Aug. 2, 2022, 9:03 p.m. UTC | #6
> > @@ -570,6 +574,8 @@ static const char *type_short_descriptions[] = {
> >               "CONFLICT (submodule history not available)",
> >       [CONFLICT_SUBMODULE_MAY_HAVE_REWINDS] =
> >               "CONFLICT (submodule may have rewinds)",
>
> The other ones are semi sentences ...
>
> > +     [CONFLICT_SUBMODULE_NULL_MERGE_BASE] =
> > +             "CONFLICT (submodule no merge base)"
>
> ... and this wants to become one, too, e.g. "submodule lacks merge
> base", perhaps?

lacks/missing both SGTM


>         if (is_null_oid(a) || is_null_oid(b))
>                 BUG(...);
>
> may be easier to read, perhaps?

ack

> I wonder where this overly deep indentation come from?  Can the
> .editorconfig file we ship with the project help?

It comes from me not properly indenting the line. I'll take a look at
the .editorconfig file.

> When we say "either merge commit %s" (where %s is 'b'---what they
> have as we saw earlier), do we need to mention what the value of 'a'
> is to the user, or is it redundant because we are absolutely sure
> that 'a' is what is checked out in the submodule?

We are absolutely sure that 'a' is the GITLINK to the submodule, but
if the user changes their local submodule without updating the reference,
it can result in errors like "not checked out" and "commits not present"
during the merge.

> > +                     strbuf_addf(&tmp, _("or update to an existing commit which has merged those changes"));
>
> "those changes" means "a..b"?  Again, I just want to make sure that
> the user in this situation knows "the other end" of the range when
> they are told only about 'b'.

Correct. Based on what I said above, it might be worthwhile to mention
'a' as well since that information could also help the user in those cases.
I'm thinking something like this:

"go to submodule ('sub' : 'a'), and either merge commit 'b'\n"
"go to submodule ('sub', 'a'), and either merge commit 'b'\n"
"go to submodule 'sub', commit 'a', and either merge commit 'b'\n"
Junio C Hamano Aug. 2, 2022, 9:11 p.m. UTC | #7
Calvin Wan <calvinwan@google.com> writes:

> I'm thinking something like this:
>
> "go to submodule ('sub' : 'a'), and either merge commit 'b'\n"
> "go to submodule ('sub', 'a'), and either merge commit 'b'\n"
> "go to submodule 'sub', commit 'a', and either merge commit 'b'\n"

In the first two, I suspect that it may not be quite clear what 'a'
means to the user.  In the third one, the first "commit" might be
mistaken as a verb.  I am tempted to say

    cd to <sub>, run "checkout --detach <a>" then "merge <b>"

but that may be a bit too prescriptive.  I dunno.
Calvin Wan Aug. 2, 2022, 9:55 p.m. UTC | #8
I'm starting to think this is getting out of scope for my patch.
For the errors, "not checked out" and "commits not present",
I will have a NEEDSWORK bit attached to them in
print_submodule_conflict(), and if any of the submodules has
those errors, then my message won't print out. That way,
we are guaranteed to have 'a' checked out when my message
prints, rendering it redundant.
Junio C Hamano Aug. 2, 2022, 10:22 p.m. UTC | #9
Calvin Wan <calvinwan@google.com> writes:

> I'm starting to think this is getting out of scope for my patch.
> For the errors, "not checked out" and "commits not present",
> I will have a NEEDSWORK bit attached to them in
> print_submodule_conflict(), and if any of the submodules has
> those errors, then my message won't print out. That way,
> we are guaranteed to have 'a' checked out when my message
> prints, rendering it redundant.

That's fine, then.

Thanks for thinking it through.
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 01f150ef3b..4302e900ee 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -387,6 +387,9 @@  struct merge_options_internal {
 
 	/* call_depth: recursion level counter for merging merge bases */
 	int call_depth;
+
+	/* field that holds submodule conflict information */
+	struct string_list conflicted_submodules;
 };
 
 struct version_info {
@@ -517,6 +520,7 @@  enum conflict_and_info_types {
 	CONFLICT_SUBMODULE_NOT_INITIALIZED,
 	CONFLICT_SUBMODULE_HISTORY_NOT_AVAILABLE,
 	CONFLICT_SUBMODULE_MAY_HAVE_REWINDS,
+	CONFLICT_SUBMODULE_NULL_MERGE_BASE,
 
 	/* Keep this entry _last_ in the list */
 	NB_CONFLICT_TYPES,
@@ -570,6 +574,8 @@  static const char *type_short_descriptions[] = {
 		"CONFLICT (submodule history not available)",
 	[CONFLICT_SUBMODULE_MAY_HAVE_REWINDS] =
 		"CONFLICT (submodule may have rewinds)",
+	[CONFLICT_SUBMODULE_NULL_MERGE_BASE] =
+		"CONFLICT (submodule no merge base)"
 };
 
 struct logical_conflict_info {
@@ -686,6 +692,8 @@  static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 
 	mem_pool_discard(&opti->pool, 0);
 
+	string_list_clear(&opti->conflicted_submodules, 1);
+
 	/* Clean out callback_data as well. */
 	FREE_AND_NULL(renames->callback_data);
 	renames->callback_data_nr = renames->callback_data_alloc = 0;
@@ -1744,26 +1752,40 @@  static int merge_submodule(struct merge_options *opt,
 
 	int i;
 	int search = !opt->priv->call_depth;
+	int sub_initialized = 1;
 
 	/* store fallback answer in result in case we fail */
 	oidcpy(result, opt->priv->call_depth ? o : a);
 
 	/* we can not handle deletion conflicts */
-	if (is_null_oid(o))
-		return 0;
 	if (is_null_oid(a))
-		return 0;
+		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()"); 
 	if (is_null_oid(b))
-		return 0;
+		BUG("submodule deleted on one side; this should be handled outside of merge_submodule()");
 
-	if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) {
+	if ((sub_initialized = repo_submodule_init(&subrepo,
+									opt->repo, path, null_oid()))) {
 		path_msg(opt, CONFLICT_SUBMODULE_NOT_INITIALIZED, 0,
 			 path, NULL, NULL, NULL,
 			 _("Failed to merge submodule %s (not checked out)"),
 			 path);
+		/*
+		 * NEEDSWORK: Since the steps to resolve this error are
+		 * more involved than what is currently in 
+		 * print_submodule_conflict_suggestion(), we return
+		 * immediately rather than generating an error message
+		 */
 		return 0;
 	}
 
+	if (is_null_oid(o)) {
+		path_msg(opt, CONFLICT_SUBMODULE_NULL_MERGE_BASE, 0,
+			 path, NULL, NULL, NULL,
+			 _("Failed to merge submodule %s (no merge base)"),
+			 path);
+		goto cleanup;
+	}
+
 	if (!(commit_o = lookup_commit_reference(&subrepo, o)) ||
 	    !(commit_a = lookup_commit_reference(&subrepo, a)) ||
 	    !(commit_b = lookup_commit_reference(&subrepo, b))) {
@@ -1849,7 +1871,15 @@  static int merge_submodule(struct merge_options *opt,
 
 	object_array_clear(&merges);
 cleanup:
-	repo_clear(&subrepo);
+	if (!opt->priv->call_depth && !ret) {
+		struct string_list *csub = &opt->priv->conflicted_submodules;
+
+		string_list_append(csub, path)->util =
+				xstrdup(repo_find_unique_abbrev(&subrepo, b, DEFAULT_ABBREV));
+	}
+
+	if (!sub_initialized)
+		repo_clear(&subrepo);
 	return ret;
 }
 
@@ -4412,6 +4442,73 @@  static int record_conflicted_index_entries(struct merge_options *opt)
 	return errs;
 }
 
+static void print_submodule_conflict_suggestion(struct string_list *csub) {
+	if (csub->nr > 0) {
+		struct string_list_item *item;
+		struct strbuf msg = STRBUF_INIT;
+		struct strbuf tmp = STRBUF_INIT;
+
+		strbuf_addf(&tmp, _("Recursive merging with submodules currently only supports trivial cases."));
+		strbuf_addf(&msg, "%s\n", tmp.buf);
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, _("Please manually handle the merging of each conflicted submodule."));
+		strbuf_addf(&msg, "%s\n", tmp.buf);
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, _("This can be accomplished with the following steps:"));
+		strbuf_addf(&msg, "%s\n", tmp.buf);
+		strbuf_release(&tmp);
+
+		for_each_string_list_item(item, csub) {
+			const char *abbrev= item->util;
+			/*
+			 * TRANSLATORS: This is a line of advice to resolve a merge conflict
+			 * in a submodule. The second argument is the abbreviated id of the
+			 * commit that needs to be merged.
+			 * E.g. - go to submodule (sub), and either merge commit abc1234"
+			 */
+			strbuf_addf(&tmp, _("go to submodule (%s), and either merge commit %s"),
+													item->string, abbrev);
+			strbuf_addf(&msg, _(" - %s"), tmp.buf);
+			strbuf_addf(&msg, "\n");
+			strbuf_release(&tmp);
+			strbuf_addf(&tmp, _("or update to an existing commit which has merged those changes"));
+			strbuf_addf(&msg, _("   %s"), tmp.buf);
+			strbuf_addf(&msg, "\n");
+			strbuf_release(&tmp);
+		}
+		strbuf_addf(&tmp, _("come back to superproject and run:"));
+		strbuf_addf(&msg, _(" - %s"), tmp.buf);
+		strbuf_addf(&msg, "\n\n");
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, "git add ");
+		strbuf_add_separated_string_list(&tmp, " ", csub);
+		strbuf_addf(&msg, _("       %s"), tmp.buf);
+		strbuf_addf(&msg, "\n\n");
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, _("to record the above merge or update"));
+		strbuf_addf(&msg, _("   %s"), tmp.buf);
+		strbuf_addf(&msg, "\n");
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, _("resolve any other conflicts in the superproject"));
+		strbuf_addf(&msg, _(" - %s"), tmp.buf);
+		strbuf_addf(&msg, "\n");
+		strbuf_release(&tmp);
+
+		strbuf_addf(&tmp, _("commit the resulting index in the superproject"));
+		strbuf_addf(&msg, _(" - %s"), tmp.buf);
+		strbuf_addf(&msg, "\n");
+		strbuf_release(&tmp);
+
+		printf("%s", msg.buf);
+		strbuf_release(&msg);
+	}
+}
+
 void merge_display_update_messages(struct merge_options *opt,
 				   int detailed,
 				   struct merge_result *result)
@@ -4461,6 +4558,8 @@  void merge_display_update_messages(struct merge_options *opt,
 	}
 	string_list_clear(&olist, 0);
 
+	print_submodule_conflict_suggestion(&opti->conflicted_submodules);
+
 	/* Also include needed rename limit adjustment now */
 	diff_warn_rename_limit("merge.renamelimit",
 			       opti->renames.needed_limit, 0);
@@ -4657,6 +4756,7 @@  static void merge_start(struct merge_options *opt, struct merge_result *result)
 	trace2_region_enter("merge", "allocate/init", opt->repo);
 	if (opt->priv) {
 		clear_or_reinit_internal_opts(opt->priv, 1);
+		string_list_init_nodup(&opt->priv->conflicted_submodules);
 		trace2_region_leave("merge", "allocate/init", opt->repo);
 		return;
 	}
diff --git a/t/t6437-submodule-merge.sh b/t/t6437-submodule-merge.sh
index c253bf759a..414597a420 100755
--- a/t/t6437-submodule-merge.sh
+++ b/t/t6437-submodule-merge.sh
@@ -103,8 +103,25 @@  test_expect_success 'setup for merge search' '
 	 echo "file-c" > file-c &&
 	 git add file-c &&
 	 git commit -m "sub-c") &&
-	git commit -a -m "c" &&
+	git commit -a -m "c")
+'
 
+test_expect_success 'merging should conflict for non fast-forward' '
+	test_when_finished "git -C merge-search reset --hard" &&
+	(cd merge-search &&
+	 git checkout -b test-nonforward-a b &&
+	  if test "$GIT_TEST_MERGE_ALGORITHM" = ort
+	  then
+		test_must_fail git merge c >actual &&
+		sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
+	 	grep "$sub_expect" actual
+	  else
+		test_must_fail git merge c 2> actual
+	  fi)
+'
+
+test_expect_success 'finish setup for merge-search' '
+	(cd merge-search &&
 	git checkout -b d a &&
 	(cd sub &&
 	 git checkout -b sub-d sub-b &&
@@ -129,14 +146,16 @@  test_expect_success 'merge with one side as a fast-forward of the other' '
 	 test_cmp expect actual)
 '
 
-test_expect_success 'merging should conflict for non fast-forward' '
+test_expect_success 'merging should conflict for non fast-forward (resolution exists)' '
 	(cd merge-search &&
-	 git checkout -b test-nonforward b &&
+	 git checkout -b test-nonforward-b b &&
 	 (cd sub &&
 	  git rev-parse --short sub-d > ../expect) &&
 	  if test "$GIT_TEST_MERGE_ALGORITHM" = ort
 	  then
-		test_must_fail git merge c >actual
+		test_must_fail git merge c >actual &&
+		sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
+	 	grep "$sub_expect" actual
 	  else
 		test_must_fail git merge c 2> actual
 	  fi &&
@@ -161,7 +180,9 @@  test_expect_success 'merging should fail for ambiguous common parent' '
 	 ) &&
 	 if test "$GIT_TEST_MERGE_ALGORITHM" = ort
 	 then
-		test_must_fail git merge c >actual
+		test_must_fail git merge c >actual &&
+		sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-c)" &&
+		grep "$sub_expect" actual
 	 else
 		test_must_fail git merge c 2> actual
 	 fi &&
@@ -205,7 +226,12 @@  test_expect_success 'merging should fail for changes that are backwards' '
 	git commit -a -m "f" &&
 
 	git checkout -b test-backward e &&
-	test_must_fail git merge f)
+	test_must_fail git merge f >actual &&
+	if test "$GIT_TEST_MERGE_ALGORITHM" = ort
+    then
+		sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short sub-d)" &&
+		grep "$sub_expect" actual
+	fi)
 '
 
 
@@ -476,4 +502,44 @@  test_expect_failure 'directory/submodule conflict; merge --abort works afterward
 	)
 '
 
+# Setup:
+#   - Submodule has 2 commits: a and b
+#   - Superproject branch 'a' adds and commits submodule pointing to 'commit a'
+#   - Superproject branch 'b' adds and commits submodule pointing to 'commit b'
+# If these two branches are now merged, there is no merge base
+test_expect_success 'setup for null merge base' '
+	mkdir no-merge-base &&
+	(cd no-merge-base &&
+	git init &&
+	mkdir sub &&
+	(cd sub &&
+	 git init &&
+	 echo "file-a" > file-a &&
+	 git add file-a &&
+	 git commit -m "commit a") &&
+	git commit --allow-empty -m init &&
+	git branch init &&
+	git checkout -b a init &&
+	git add sub &&
+	git commit -m "a" &&
+	git switch main &&
+	(cd sub &&
+	 echo "file-b" > file-b &&
+	 git add file-b &&
+	 git commit -m "commit b"))
+'
+
+test_expect_success 'merging should fail with no merge base' '
+	(cd no-merge-base &&
+	git checkout -b b init &&
+	git add sub &&
+	git commit -m "b" &&
+	test_must_fail git merge a >actual &&
+	if test "$GIT_TEST_MERGE_ALGORITHM" = ort
+    then
+		sub_expect="go to submodule (sub), and either merge commit $(git -C sub rev-parse --short HEAD^1)" &&
+		grep "$sub_expect" actual
+	fi)
+'
+
 test_done
diff --git a/t/t7402-submodule-rebase.sh b/t/t7402-submodule-rebase.sh
index 8e32f19007..ebeca12a71 100755
--- a/t/t7402-submodule-rebase.sh
+++ b/t/t7402-submodule-rebase.sh
@@ -104,7 +104,7 @@  test_expect_success 'rebasing submodule that should conflict' '
 	test_tick &&
 	git commit -m fourth &&
 
-	test_must_fail git rebase --onto HEAD^^ HEAD^ HEAD^0 &&
+	test_must_fail git rebase --onto HEAD^^ HEAD^ HEAD^0 >actual_output &&
 	git ls-files -s submodule >actual &&
 	(
 		cd submodule &&
@@ -112,7 +112,12 @@  test_expect_success 'rebasing submodule that should conflict' '
 		echo "160000 $(git rev-parse HEAD^^) 2	submodule" &&
 		echo "160000 $(git rev-parse HEAD) 3	submodule"
 	) >expect &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+	if test "$GIT_TEST_MERGE_ALGORITHM" = ort
+    then
+		sub_expect="go to submodule (submodule), and either merge commit $(git -C submodule rev-parse --short HEAD^0)" &&
+		grep "$sub_expect" actual_output
+	fi
 '
 
 test_done