diff mbox series

[08/12] merge-ort: provide a merge_get_conflicted_files() helper function

Message ID 35e0ed9271a0229fe2acd2385a7e4171d4dfe077.1642888562.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series RFC: In-core git merge-tree ("Server side merges") | expand

Commit Message

Elijah Newren Jan. 22, 2022, 9:55 p.m. UTC
From: Elijah Newren <newren@gmail.com>

After a merge, this function allows the user to extract the same
information that would be printed by `ls-files -u` -- conflicted
files with their mode, oid, and stage.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 31 +++++++++++++++++++++++++++++++
 merge-ort.h | 21 +++++++++++++++++++++
 2 files changed, 52 insertions(+)

Comments

Christian Couder Jan. 26, 2022, 10:55 a.m. UTC | #1
On Sat, Jan 22, 2022 at 10:56 PM Elijah Newren via GitGitGadget
<gitgitgadget@gmail.com> wrote:

> After a merge, this function allows the user to extract the same
> information that would be printed by `ls-files -u` -- conflicted

This made me wonder if "-- conflicted" should be part of the `ls-files
-u` command. Maybe "by `ls-files -u`, which means" would make things a
bit clearer.

> files with their mode, oid, and stage.
Christian Couder Jan. 26, 2022, 11:07 a.m. UTC | #2
On Sat, Jan 22, 2022 at 10:56 PM Elijah Newren via GitGitGadget
<gitgitgadget@gmail.com> wrote:

> +void merge_get_conflicted_files(struct merge_result *result,
> +                               struct string_list *conflicted_files)
> +{
> +       struct hashmap_iter iter;
> +       struct strmap_entry *e;
> +       struct merge_options_internal *opti = result->priv;
> +
> +       strmap_for_each_entry(&opti->conflicted, &iter, e) {
> +               const char *path = e->key;
> +               struct conflict_info *ci = e->value;
> +               int i;
> +
> +               VERIFY_CI(ci);
> +
> +               for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
> +                       struct stage_info *si;
> +
> +                       if (!(ci->filemask & (1ul << i)))
> +                               continue;
> +
> +                       si = xmalloc(sizeof(*si));

It's probably a premature optimization, so feel free to ignore, but as
MERGE_BASE and MERGE_SIDE2 are constants, and ci->filemask is constant
inside the 'for' loop, we could compute before the 'for' loop how many
'struct stage_info' we will need and allocate them all at once before
the 'for' loop.

> +                       si->stage = i+1;
> +                       si->mode = ci->stages[i].mode;
> +                       oidcpy(&si->oid, &ci->stages[i].oid);
> +                       string_list_append(conflicted_files, path)->util = si;
> +               }
> +       }
> +       /* string_list_sort() uses a stable sort, so we're good */
> +       string_list_sort(conflicted_files);
> +}
Johannes Schindelin Jan. 28, 2022, 4:55 p.m. UTC | #3
Hi Elijah,

On Sat, 22 Jan 2022, Elijah Newren via GitGitGadget wrote:

> From: Elijah Newren <newren@gmail.com>
>
> After a merge, this function allows the user to extract the same
> information that would be printed by `ls-files -u` -- conflicted
> files with their mode, oid, and stage.

Hmm. Okay.

I am not really a fan of that output where we use a variable
number of lines per file. As in: in the regular case, where a file was
modified in divergent ways, we get three lines. But if it was deleted in
one branch, or if it was created in both branches, we have only two lines.

Frankly, I'd much rather have 3 lines for each and every conflict.

Granted, we currently only have three stages, and we can pretty much
guarantee that at least two of these stages are non-empty (otherwise where
would be the conflict?).

Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
missing from the second conflict, in the output we would see stages 1, 2,
2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
different conflicts.

But still. It makes me uneasy to have that much variability in
machine-consumable output.

And who knows, maybe if we're ever implementing more sophisticated merge
strategies for octopus merges, we might end up with more stages...

In other words...

> Signed-off-by: Elijah Newren <newren@gmail.com>
> ---
>  merge-ort.c | 31 +++++++++++++++++++++++++++++++
>  merge-ort.h | 21 +++++++++++++++++++++
>  2 files changed, 52 insertions(+)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index b78dde55ad9..5e7cea6cc8f 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -4275,6 +4275,37 @@ void merge_display_update_messages(struct merge_options *opt,
>  	trace2_region_leave("merge", "display messages", opt->repo);
>  }
>
> +void merge_get_conflicted_files(struct merge_result *result,
> +				struct string_list *conflicted_files)
> +{
> +	struct hashmap_iter iter;
> +	struct strmap_entry *e;
> +	struct merge_options_internal *opti = result->priv;
> +
> +	strmap_for_each_entry(&opti->conflicted, &iter, e) {
> +		const char *path = e->key;
> +		struct conflict_info *ci = e->value;
> +		int i;
> +
> +		VERIFY_CI(ci);
> +
> +		for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
> +			struct stage_info *si;
> +
> +			if (!(ci->filemask & (1ul << i)))
> +				continue;
> +
> +			si = xmalloc(sizeof(*si));
> +			si->stage = i+1;
> +			si->mode = ci->stages[i].mode;
> +			oidcpy(&si->oid, &ci->stages[i].oid);
> +			string_list_append(conflicted_files, path)->util = si;
> +		}
> +	}
> +	/* string_list_sort() uses a stable sort, so we're good */
> +	string_list_sort(conflicted_files);
> +}
> +
>  void merge_switch_to_result(struct merge_options *opt,
>  			    struct tree *head,
>  			    struct merge_result *result,
> diff --git a/merge-ort.h b/merge-ort.h
> index d643b47cb7c..e635a294ea8 100644
> --- a/merge-ort.h
> +++ b/merge-ort.h
> @@ -2,6 +2,7 @@
>  #define MERGE_ORT_H
>
>  #include "merge-recursive.h"
> +#include "hash.h"
>
>  struct commit;
>  struct tree;
> @@ -89,6 +90,26 @@ void merge_display_update_messages(struct merge_options *opt,
>  				   struct merge_result *result,
>  				   FILE *stream);
>
> +struct stage_info {
> +	struct object_id oid;
> +	int mode;
> +	int stage;
> +};

... I'd rather not tack this onto a `string_list` but instead have
something like:

	struct conflict_info {
		struct {
			struct object_id oid;
			int mode;
			const char *path;
		} stages[3]
	};

where `path` can be `NULL` to indicate that this stage is missing.

Apart from this concern about the overall design, the patch looks good, of
course.

Ciao,
Dscho

> +
> +/*
> + * Provide a list of path -> {struct stage_info*} mappings for
> + * all conflicted files.  Note that each path could appear up to three
> + * times in the list, corresponding to 3 different stage entries.  In short,
> + * this basically provides the info that would be printed by `ls-files -u`.
> + *
> + * result should have been populated by a call to
> + * one of the merge_incore_[non]recursive() functions.
> + *
> + * conflicted_files should be empty before calling this function.
> + */
> +void merge_get_conflicted_files(struct merge_result *result,
> +				struct string_list *conflicted_files);
> +
>  /* Do needed cleanup when not calling merge_switch_to_result() */
>  void merge_finalize(struct merge_options *opt,
>  		    struct merge_result *result);
> --
> gitgitgadget
>
>
Elijah Newren Jan. 29, 2022, 4:55 a.m. UTC | #4
On Wed, Jan 26, 2022 at 2:55 AM Christian Couder
<christian.couder@gmail.com> wrote:
>
> On Sat, Jan 22, 2022 at 10:56 PM Elijah Newren via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>
> > After a merge, this function allows the user to extract the same
> > information that would be printed by `ls-files -u` -- conflicted
>
> This made me wonder if "-- conflicted" should be part of the `ls-files
> -u` command. Maybe "by `ls-files -u`, which means" would make things a
> bit clearer.
>
> > files with their mode, oid, and stage.

Yes, I like that wording better.  Thanks!
Elijah Newren Jan. 29, 2022, 5:06 a.m. UTC | #5
On Wed, Jan 26, 2022 at 3:07 AM Christian Couder
<christian.couder@gmail.com> wrote:
>
> On Sat, Jan 22, 2022 at 10:56 PM Elijah Newren via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>
> > +void merge_get_conflicted_files(struct merge_result *result,
> > +                               struct string_list *conflicted_files)
> > +{
> > +       struct hashmap_iter iter;
> > +       struct strmap_entry *e;
> > +       struct merge_options_internal *opti = result->priv;
> > +
> > +       strmap_for_each_entry(&opti->conflicted, &iter, e) {
> > +               const char *path = e->key;
> > +               struct conflict_info *ci = e->value;
> > +               int i;
> > +
> > +               VERIFY_CI(ci);
> > +
> > +               for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
> > +                       struct stage_info *si;
> > +
> > +                       if (!(ci->filemask & (1ul << i)))
> > +                               continue;
> > +
> > +                       si = xmalloc(sizeof(*si));
>
> It's probably a premature optimization, so feel free to ignore, but as
> MERGE_BASE and MERGE_SIDE2 are constants, and ci->filemask is constant
> inside the 'for' loop, we could compute before the 'for' loop how many
> 'struct stage_info' we will need and allocate them all at once before
> the 'for' loop.

That's an interesting idea, but if we allocate all at once, then we'd
need to be able to deallocate all at once.  I'm not sure how to do
that, but basically the straightforward
    string_list_free(conflicted_files, 1);
that callers can use with the current function would no longer be
valid, and it might be somewhat difficult for callers to figure out
how to free the memory that was allocated.
Elijah Newren Jan. 29, 2022, 6:08 a.m. UTC | #6
On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Sat, 22 Jan 2022, Elijah Newren via GitGitGadget wrote:
>
> > From: Elijah Newren <newren@gmail.com>
> >
> > After a merge, this function allows the user to extract the same
> > information that would be printed by `ls-files -u` -- conflicted
> > files with their mode, oid, and stage.
>
> Hmm. Okay.
>
> I am not really a fan of that output where we use a variable
> number of lines per file. As in: in the regular case, where a file was
> modified in divergent ways, we get three lines. But if it was deleted in
> one branch, or if it was created in both branches, we have only two lines.
>
> Frankly, I'd much rather have 3 lines for each and every conflict.

Out of curiosity, does this also mean you feel like `git ls-files -u`
is mis-designed (even if we're stuck with it for backward
compatibility reasons)?

Anyway, if we make this change guaranteeing there are 3 lines for each
and every conflict, that probably implies we can remove the stage
field (just letting it be implicit), right?  And it also implies that
the "path" field of the lines is now duplicate information.  Should
the path be printed on each line regardless of the duplication?
Should the path be printed separately on its own line, followed by the
three lines of (mode, oid) pairs?  Or should the format be something
else entirely?

> Granted, we currently only have three stages...

I thought that was true too, but Junio informed me otherwise[1].
Granted, ort does not currently make use of that, but it could.  Not
sure I want to rule it out.

[1] https://lore.kernel.org/git/xmqqr1t89gi8.fsf@gitster.c.googlers.com/

> ...and we can pretty much
> guarantee that at least two of these stages are non-empty (otherwise where
> would be the conflict?).

No, that's not true.  See the paragraph about "conflict types" from
the final patch in this series, where I mentioned three different
types of conflicts that can result in a path having a single higher
order stage.

> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
> missing from the second conflict, in the output we would see stages 1, 2,
> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
> different conflicts.

I don't understand why you're fixating on the stage here.  Why would
you want to group all the stage 2s together, count them up, and then
determine there are N conflicting files because there are N stage 2's?
 In such a design, you'd have to do the extra post-processing to
recognize the all-zero modes and hashes and toss them.

To me, that seems like more work than just handling the `ls-files -u`
style of output, and doesn't have the advantage of showing things in a
format users may already be familiar with (see above about dropping
stages and maybe also pathname).  Since the `ls-files -u` output is
simply several lines of:
   <mode> <hash> <stage> <filename>
You can get the number of conflicted files by counting the number of
*unique* filenames.  If you want to see which lines are about the same
file, get all the lines with the same filename -- they are sorted by
(<filename>, <stage>) for convenience, much like `ls-files -u` is.


On a different angle, I'm also slightly worried there's an embedded
assumption here, one that I tried to address in the "Mistake to avoid"
section I added in my last patch of this series.  What if you have a
conflicting merge and 0 lines of output in the conflicting info
section?  Is there something about the reason you're asking for three
lines of output per conflicted file which is going to make you
overlook that particular possibility and not handle it?

> But still. It makes me uneasy to have that much variability in
> machine-consumable output.
>
> And who knows, maybe if we're ever implementing more sophisticated merge
> strategies for octopus merges, we might end up with more stages...

Um, if we need to handle more stages, wouldn't that undercut your
request for exactly 3 stages?  Wouldn't it suggest that we would
indeed want to have a flexible number of lines?

> In other words...
>
> > Signed-off-by: Elijah Newren <newren@gmail.com>
> > ---
> >  merge-ort.c | 31 +++++++++++++++++++++++++++++++
> >  merge-ort.h | 21 +++++++++++++++++++++
> >  2 files changed, 52 insertions(+)
> >
> > diff --git a/merge-ort.c b/merge-ort.c
> > index b78dde55ad9..5e7cea6cc8f 100644
> > --- a/merge-ort.c
> > +++ b/merge-ort.c
> > @@ -4275,6 +4275,37 @@ void merge_display_update_messages(struct merge_options *opt,
> >       trace2_region_leave("merge", "display messages", opt->repo);
> >  }
> >
> > +void merge_get_conflicted_files(struct merge_result *result,
> > +                             struct string_list *conflicted_files)
> > +{
> > +     struct hashmap_iter iter;
> > +     struct strmap_entry *e;
> > +     struct merge_options_internal *opti = result->priv;
> > +
> > +     strmap_for_each_entry(&opti->conflicted, &iter, e) {
> > +             const char *path = e->key;
> > +             struct conflict_info *ci = e->value;
> > +             int i;
> > +
> > +             VERIFY_CI(ci);
> > +
> > +             for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
> > +                     struct stage_info *si;
> > +
> > +                     if (!(ci->filemask & (1ul << i)))
> > +                             continue;
> > +
> > +                     si = xmalloc(sizeof(*si));
> > +                     si->stage = i+1;
> > +                     si->mode = ci->stages[i].mode;
> > +                     oidcpy(&si->oid, &ci->stages[i].oid);
> > +                     string_list_append(conflicted_files, path)->util = si;
> > +             }
> > +     }
> > +     /* string_list_sort() uses a stable sort, so we're good */
> > +     string_list_sort(conflicted_files);
> > +}
> > +
> >  void merge_switch_to_result(struct merge_options *opt,
> >                           struct tree *head,
> >                           struct merge_result *result,
> > diff --git a/merge-ort.h b/merge-ort.h
> > index d643b47cb7c..e635a294ea8 100644
> > --- a/merge-ort.h
> > +++ b/merge-ort.h
> > @@ -2,6 +2,7 @@
> >  #define MERGE_ORT_H
> >
> >  #include "merge-recursive.h"
> > +#include "hash.h"
> >
> >  struct commit;
> >  struct tree;
> > @@ -89,6 +90,26 @@ void merge_display_update_messages(struct merge_options *opt,
> >                                  struct merge_result *result,
> >                                  FILE *stream);
> >
> > +struct stage_info {
> > +     struct object_id oid;
> > +     int mode;
> > +     int stage;
> > +};
>
> ... I'd rather not tack this onto a `string_list` but instead have
> something like:
>
>         struct conflict_info {

Nit: "conflict_info" is already taken for another structure; we'd need
a different name.  But it does illustrate the idea just fine.

>                 struct {
>                         struct object_id oid;
>                         int mode;
>                         const char *path;
>                 } stages[3]
>         };
>
> where `path` can be `NULL` to indicate that this stage is missing.

I'll get back to the data structure in a second, but the note about
path being `NULL` is interesting.  I'm presuming that oid and mode are
all-zeros as well, yes?  Now, your original request was that you
wanted a line printed for all three stages.  If we're printing oid,
mode, and path for each stage, what do we print for the path one these
lines?  Is it (1) "(null)", (2) "", or (3) the real pathname (via
carefully comparing to surrounding stages to find out what it is)?  If
we're changing the format to only print the name of the path once,
does the code need to loop over the list of conflicts in order to find
out the name (since the first or second stage might have a NULL path
field)?

In regards to the overall data structure and your comment about
string_list: I'm slightly confused.  You say you want to avoid a
string_list, but you've only accounted for there being one conflict in
this data structure.  I don't see how this suggestion avoids the need
for an array or a string_list or some kind of container to hold
multiple of these.

The inclusion of path within the stages array is also interesting --
should there be (up to) three copies of the pathname allocated per
conflict?  Seems a bit wasteful.  Wouldn't it make more sense to have
something like

    struct conflicts {
        struct {
            struct object_id oid;
            unsigned short mode;
        } stages[3]
    };

and then have a string_list storing the pathname -> conflicts mappings
to avoid the pathname duplication?  Or is there something you really
find problematic about the string_list and you really want an array or
other data structure?
Johannes Sixt Jan. 29, 2022, 8:23 a.m. UTC | #7
Just a heckling from the peanut gallery...

Am 29.01.22 um 07:08 schrieb Elijah Newren:
> On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
>> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
>> missing from the second conflict, in the output we would see stages 1, 2,
>> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
>> different conflicts.
> 
> I don't understand why you're fixating on the stage here.  Why would
> you want to group all the stage 2s together, count them up, and then
> determine there are N conflicting files because there are N stage 2's?

Looks like you are misunderstanding Dscho's point: When you have two
conflicts, the first with stages 1 and 2, the second with stages 2 and
3, then the 2s occur lumped together when the 4 lines are printed in a
row, and that is the cue to the parser where the new conflict begins.
Dscho did not mean that all N 2s of should be listed together.

-- Hannes
Elijah Newren Jan. 29, 2022, 4:47 p.m. UTC | #8
On Sat, Jan 29, 2022 at 12:23 AM Johannes Sixt <j6t@kdbg.org> wrote:
>
> Just a heckling from the peanut gallery...
>
> Am 29.01.22 um 07:08 schrieb Elijah Newren:
> > On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> >> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
> >> missing from the second conflict, in the output we would see stages 1, 2,
> >> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
> >> different conflicts.
> >
> > I don't understand why you're fixating on the stage here.  Why would
> > you want to group all the stage 2s together, count them up, and then
> > determine there are N conflicting files because there are N stage 2's?
>
> Looks like you are misunderstanding Dscho's point: When you have two
> conflicts, the first with stages 1 and 2, the second with stages 2 and
> 3, then the 2s occur lumped together when the 4 lines are printed in a
> row, and that is the cue to the parser where the new conflict begins.
> Dscho did not mean that all N 2s of should be listed together.

Ah, so...I didn't understand his misunderstanding?  Using stages as a
cue to the parser where the new conflict begins is broken; you should
instead check for when the filename listed on a line does not match
the filename on the previous line.  In particular, if one conflict has
stages 1 and 2, and the next conflict has only stage 3, then looking
at stages only might cause you to accidentally lump unrelated
conflicts together.
Johannes Schindelin Feb. 4, 2022, 11:10 p.m. UTC | #9
Hi Elijah,

On Sat, 29 Jan 2022, Elijah Newren wrote:

> On Sat, Jan 29, 2022 at 12:23 AM Johannes Sixt <j6t@kdbg.org> wrote:
> >
> > Just a heckling from the peanut gallery...
> >
> > Am 29.01.22 um 07:08 schrieb Elijah Newren:
> > > On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
> > > <Johannes.Schindelin@gmx.de> wrote:
> > >> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
> > >> missing from the second conflict, in the output we would see stages 1, 2,
> > >> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
> > >> different conflicts.
> > >
> > > I don't understand why you're fixating on the stage here.  Why would
> > > you want to group all the stage 2s together, count them up, and then
> > > determine there are N conflicting files because there are N stage 2's?
> >
> > Looks like you are misunderstanding Dscho's point: When you have two
> > conflicts, the first with stages 1 and 2, the second with stages 2 and
> > 3, then the 2s occur lumped together when the 4 lines are printed in a
> > row, and that is the cue to the parser where the new conflict begins.
> > Dscho did not mean that all N 2s of should be listed together.
>
> Ah, so...I didn't understand his misunderstanding?  Using stages as a
> cue to the parser where the new conflict begins is broken; you should
> instead check for when the filename listed on a line does not match
> the filename on the previous line.

But that would break down in case of rename/rename conflicts, right?

> In particular, if one conflict has stages 1 and 2, and the next conflict
> has only stage 3, then looking at stages only might cause you to
> accidentally lump unrelated conflicts together.

Precisely. That's why I would love to have a way to deviate from the
output of `ls-files -u`'s format, and have a reliable way to indicate
stages that belong to the same merge conflict.

Thanks,
Dscho
Elijah Newren Feb. 5, 2022, 12:54 a.m. UTC | #10
On Fri, Feb 4, 2022 at 3:10 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Sat, 29 Jan 2022, Elijah Newren wrote:
>
> > On Sat, Jan 29, 2022 at 12:23 AM Johannes Sixt <j6t@kdbg.org> wrote:
> > >
> > > Just a heckling from the peanut gallery...
> > >
> > > Am 29.01.22 um 07:08 schrieb Elijah Newren:
> > > > On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
> > > > <Johannes.Schindelin@gmx.de> wrote:
> > > >> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
> > > >> missing from the second conflict, in the output we would see stages 1, 2,
> > > >> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
> > > >> different conflicts.
> > > >
> > > > I don't understand why you're fixating on the stage here.  Why would
> > > > you want to group all the stage 2s together, count them up, and then
> > > > determine there are N conflicting files because there are N stage 2's?
> > >
> > > Looks like you are misunderstanding Dscho's point: When you have two
> > > conflicts, the first with stages 1 and 2, the second with stages 2 and
> > > 3, then the 2s occur lumped together when the 4 lines are printed in a
> > > row, and that is the cue to the parser where the new conflict begins.
> > > Dscho did not mean that all N 2s of should be listed together.
> >
> > Ah, so...I didn't understand his misunderstanding?  Using stages as a
> > cue to the parser where the new conflict begins is broken; you should
> > instead check for when the filename listed on a line does not match
> > the filename on the previous line.
>
> But that would break down in case of rename/rename conflicts, right?
>
> > In particular, if one conflict has stages 1 and 2, and the next conflict
> > has only stage 3, then looking at stages only might cause you to
> > accidentally lump unrelated conflicts together.
>
> Precisely. That's why I would love to have a way to deviate from the
> output of `ls-files -u`'s format, and have a reliable way to indicate
> stages that belong to the same merge conflict.

Ah, attempting to somehow identify and present logical separate
conflicts?  That could be awesome, but I'm not sure it's technically
possible.  It certainly isn't with today's merge-ort.

Let me ask some questions first...

If I understand you correctly then in the event of a rename/rename,
i.e. foo->bar & foo->baz, then you want foo's, bar's, & baz's stages
all listed together.  Right?  And in some way that you can identify
them as related?

If we do so, how do we mark the beginning and the end of what you call
"the same merge conflict"?  If you say it's always 3 stages (with the
possibility of all-zero modes/oids), then what about the rename/rename
case above modified so that the side that did foo->baz also added a
different 'bar'?  That'd be 4 non-zero modes/oids, all of them
relevant.  Or what if the side that did foo->bar also renamed
something else to 'baz', giving us even more non-zero stages for these
three paths?  Perhaps you consider these different conflicts and want
them listed separately -- if so, where does one conflict begin and
another start and which stages are parts of which conflict?

If you are attempting to somehow present the stuff that "belongs to
the same merge conflict" are you also trying to identify what kind of
merge conflict it is?  If so, do you want each type of merge conflict
listed?  For example, let's switch from the example above of logically
disjoint paths coming together to result in more than 3 stages, and
instead pick an example with a single logical path with less than
three stages.  And now let's say that path has multiple conflicts
associated with it; let's use an example with 3: rename/delete +
modify/delete + directory/file (one side renames foo->bar while
modifying the contents, the other side deletes foo and adds the
directory 'bar/').  In this case, there is a target file 'bar' that
has two non-zero modes/oids in the ls-files-u output.  If all three
types of conflicts need to be listed, does each need to be listed with
the two non-zero modes/oids (and perhaps one zero mode/oid), resulting
in six listings for 'bar'?  Or would the duplication be confusing
enough that we instead decide to list some merge conflicts with no
stages associated with them?

Thinking about both sets of questions in the last two paragraphs from
a higher level -- should we focus on and group the higher order stages
by the individual conflicts that happen, or should we group them by
the paths that they happen to (which is what `ls-files -u` happens to
do), or should we not bother grouping them and instead duplicate the
higher order stages for each logical conflict it is part of?

As an alternative to duplicating higher order stages, do we sometimes
decide to "lump" separate conflicts together and treat them as one
conflict?  If so, what are the rules on how we decide to lump
conflicts and when not to?  Is there a bright line boundary?  And can
it be done without sucking in arbitrarily more stages for a single
conflict?


Some testcases that might be useful while considering the above
questions: take a look at the "rad", "rrdd", and "mod6" tests of
t6422.  How many "same merge conflicts" are there for each of those,
and what's the boundary between them?  And can you give the answer in
the form of rules that generically handle all cases, rather than just
answering these three specific cases?


I've thought about this problem long and hard before (in part because
of some conversations I had with Edward Thompson about libgit2 and
merging at Git Merge 2020).  It wasn't at all clear to me that libgit2
had considered anything beyond simple rename cases.  The only rules I
ever figured out that made sense to me was "group the stages by target
filename rather than by logical conflict" (so we get `ls -files -u`
populated) and print a meant-for-human message for each logical
conflict (found in the <Informational Messages> section for
merge-tree), and make NO attempt to connect stages by conflict type.

I'm sure that's not what you wanted to hear, and maybe doesn't even
play nicely with your design.  But short of ignoring the edge and
corner cases, I don't see how to solve that problem.  If you do just
want to ignore edge and corner cases, then just ignore the
rename/rename case you brought up in the first place and just use
`ls-files -u`-type output as-is within your design.  If you don't want
to ignore edge cases and want something that works with a specific
design that somehow groups conflicted file stages by conflict type,
then we're going to have to dig into all these questions above and do
some big replumbing within merge-ort.
Johannes Schindelin Feb. 21, 2022, 10:46 a.m. UTC | #11
Hi Elijah,

On Fri, 4 Feb 2022, Elijah Newren wrote:

> On Fri, Feb 4, 2022 at 3:10 PM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > On Sat, 29 Jan 2022, Elijah Newren wrote:
> >
> > > On Sat, Jan 29, 2022 at 12:23 AM Johannes Sixt <j6t@kdbg.org> wrote:
> > > >
> > > > Just a heckling from the peanut gallery...
> > > >
> > > > Am 29.01.22 um 07:08 schrieb Elijah Newren:
> > > > > On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
> > > > > <Johannes.Schindelin@gmx.de> wrote:
> > > > >> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
> > > > >> missing from the second conflict, in the output we would see stages 1, 2,
> > > > >> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
> > > > >> different conflicts.
> > > > >
> > > > > I don't understand why you're fixating on the stage here.  Why would
> > > > > you want to group all the stage 2s together, count them up, and then
> > > > > determine there are N conflicting files because there are N stage 2's?
> > > >
> > > > Looks like you are misunderstanding Dscho's point: When you have two
> > > > conflicts, the first with stages 1 and 2, the second with stages 2 and
> > > > 3, then the 2s occur lumped together when the 4 lines are printed in a
> > > > row, and that is the cue to the parser where the new conflict begins.
> > > > Dscho did not mean that all N 2s of should be listed together.
> > >
> > > Ah, so...I didn't understand his misunderstanding?  Using stages as a
> > > cue to the parser where the new conflict begins is broken; you should
> > > instead check for when the filename listed on a line does not match
> > > the filename on the previous line.
> >
> > But that would break down in case of rename/rename conflicts, right?
> >
> > > In particular, if one conflict has stages 1 and 2, and the next conflict
> > > has only stage 3, then looking at stages only might cause you to
> > > accidentally lump unrelated conflicts together.
> >
> > Precisely. That's why I would love to have a way to deviate from the
> > output of `ls-files -u`'s format, and have a reliable way to indicate
> > stages that belong to the same merge conflict.
>
> Ah, attempting to somehow identify and present logical separate
> conflicts?  That could be awesome, but I'm not sure it's technically
> possible.  It certainly isn't with today's merge-ort.
>
> Let me ask some questions first...
>
> If I understand you correctly then in the event of a rename/rename,
> i.e. foo->bar & foo->baz, then you want foo's, bar's, & baz's stages
> all listed together.  Right?  And in some way that you can identify
> them as related?

Yes, that was kind of my idea ;-)

> If we do so, how do we mark the beginning and the end of what you call
> "the same merge conflict"?  If you say it's always 3 stages (with the
> possibility of all-zero modes/oids), then what about the rename/rename
> case above modified so that the side that did foo->baz also added a
> different 'bar'?  That'd be 4 non-zero modes/oids, all of them
> relevant.  Or what if the side that did foo->bar also renamed
> something else to 'baz', giving us even more non-zero stages for these
> three paths?  Perhaps you consider these different conflicts and want
> them listed separately -- if so, where does one conflict begin and
> another start and which stages are parts of which conflict?

Thank you for calling me out on an only half-finished thought.

To be quite honest, I previously did not have much time to think of the
non-trivial nature of representing merge conflicts in a web UI when
performing merges with rename detection, in particular when performing
_recursive_ merges (where, as you point out so correctly, an arbitrary
number of rename conflicts can happen _for the same file_).

Of course, this gets even more complicated when we're also thinking about
type changes (file -> symlink, file -> directory, and vice versa).

> If you are attempting to somehow present the stuff that "belongs to
> the same merge conflict" are you also trying to identify what kind of
> merge conflict it is?  If so, do you want each type of merge conflict
> listed?  For example, let's switch from the example above of logically
> disjoint paths coming together to result in more than 3 stages, and
> instead pick an example with a single logical path with less than
> three stages.  And now let's say that path has multiple conflicts
> associated with it; let's use an example with 3: rename/delete +
> modify/delete + directory/file (one side renames foo->bar while
> modifying the contents, the other side deletes foo and adds the
> directory 'bar/').  In this case, there is a target file 'bar' that
> has two non-zero modes/oids in the ls-files-u output.  If all three
> types of conflicts need to be listed, does each need to be listed with
> the two non-zero modes/oids (and perhaps one zero mode/oid), resulting
> in six listings for 'bar'?  Or would the duplication be confusing
> enough that we instead decide to list some merge conflicts with no
> stages associated with them?
>
> Thinking about both sets of questions in the last two paragraphs from
> a higher level -- should we focus on and group the higher order stages
> by the individual conflicts that happen, or should we group them by
> the paths that they happen to (which is what `ls-files -u` happens to
> do), or should we not bother grouping them and instead duplicate the
> higher order stages for each logical conflict it is part of?
>
> As an alternative to duplicating higher order stages, do we sometimes
> decide to "lump" separate conflicts together and treat them as one
> conflict?  If so, what are the rules on how we decide to lump
> conflicts and when not to?  Is there a bright line boundary?  And can
> it be done without sucking in arbitrarily more stages for a single
> conflict?
>
>
> Some testcases that might be useful while considering the above
> questions: take a look at the "rad", "rrdd", and "mod6" tests of
> t6422.  How many "same merge conflicts" are there for each of those,
> and what's the boundary between them?  And can you give the answer in
> the form of rules that generically handle all cases, rather than just
> answering these three specific cases?
>
>
> I've thought about this problem long and hard before (in part because
> of some conversations I had with Edward Thompson about libgit2 and

Not a big deal for _me_, but I seem to remember that Ed cared a lot about
having no p in their surname ;-)

> merging at Git Merge 2020).  It wasn't at all clear to me that libgit2
> had considered anything beyond simple rename cases.  The only rules I
> ever figured out that made sense to me was "group the stages by target
> filename rather than by logical conflict" (so we get `ls -files -u`
> populated) and print a meant-for-human message for each logical
> conflict (found in the <Informational Messages> section for
> merge-tree), and make NO attempt to connect stages by conflict type.
>
> I'm sure that's not what you wanted to hear, and maybe doesn't even
> play nicely with your design.  But short of ignoring the edge and
> corner cases, I don't see how to solve that problem.  If you do just
> want to ignore edge and corner cases, then just ignore the
> rename/rename case you brought up in the first place and just use
> `ls-files -u`-type output as-is within your design.  If you don't want
> to ignore edge cases and want something that works with a specific
> design that somehow groups conflicted file stages by conflict type,
> then we're going to have to dig into all these questions above and do
> some big replumbing within merge-ort.

There is sometimes a big difference between what I want to hear and what I
need to hear. Thank you for providing so many details that I needed to
hear.

So let's take a step back and look at my goal here, as in: the
over-arching goal: to use merge-ort on the server side.

From what you said above, it becomes very clear to me that there is very
little chance to resolve such conflicts on the server side.

For example, if a topic branch renames a file differently than the main
branch, there is a really good chance that the user tasked with merging
the topic branch will have to do a whole lot more than just click a few
buttons to perform that task. There might very well be the need to edit
files that do not contain merge conflict markers (I like to call those
cases "non-semantic merge conflicts"), and almost certainly local testing
will be necessary.

So I guess the best we can do in those complicated cases is to give a
comprehensive overview of the problems in the web UI, with the note that
this merge conflict has to be resolved on the local side.

Which brings me to the next concern: since `merge-tree` is a low-level
tool meant to be called by programs rather than humans, we need to make
sure that those messages remain machine-parseable, even if they contain
file names.

Concretely: while I am not currently aware of any web UI that allows to
resolve simple rename/rename conflicts, it is easily conceivable how to
implement such a thing. When that happens, we will need to be able to
teach the server-side code to discern between the cases that can be
handled in the web UI (trivial merge conflicts, trivial rename/rename
conflicts) as compared to scenarios where the conflicts are just too
complex.

Here's an excerpt from t4301:

-- snip --
Auto-merging greeting
CONFLICT (content): Merge conflict in greeting
Auto-merging numbers
CONFLICT (file/directory): directory in the way of whatever from side1; moving it to whatever~side1 instead.
CONFLICT (modify/delete): whatever~side1 deleted in side2 and modified in side1.  Version side1 of whatever~side1 left in tree.
-- snap --

This is the complete set of messages provided in the `test conflict
notices and such` test case.

I immediately notice that every line contains at least one file name.
Looking at https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1899, it
does not seem as if the file names are quoted:

		path_msg(opt, path, 1, _("Auto-merging %s"), path);

(where `path` is used verbatim in a call to `merge_3way()` before that,
i.e. it must not have been quoted)

I would like to register a wish to ensure that file names with special
characters (such as most notably line-feed characters) are quoted in these
messages, so that a simple server-side parser can handle messages starting
with `Auto-merging` and with `CONFLICT (content): Merge conflict in `, and
"throw the hands up in the air" if any other message prefix is seen.

Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
For the `Auto-merging` one, it would be trivial, but I fear that we will
have to work a bit on the `path_msg()` function
(https://github.com/git/git/blob/v2.35.1/merge-ort.c#L630-L649) because it
accepts a variable list of arguments without any clue whether the
arguments refer to paths or not. (And I would be loathe to switch _all_
callers to do the quoting themselves.)

I see 28 calls to that function, and at least a couple that pass not only
a path but also an OID (e.g.
https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1611-L1613).

We could of course be sloppy and pass even OIDs through
`sq_quote_buf_pretty()` in `path_msg()`, knowing that there won't be any
special characters in them, but it gets more complicated e.g. in
https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1648-L1651, where we
pass an `strbuf` that contains a somewhat free-form commit message.

I guess we could still pass those through `sq_quote_buf_pretty()`, even if
they are not paths, to ensure that there are no special characters in the
machine-parseable lines.

What do you think?

Ciao,
Dscho
Ævar Arnfjörð Bjarmason Feb. 21, 2022, 2:27 p.m. UTC | #12
On Mon, Feb 21 2022, Johannes Schindelin wrote:

> Hi Elijah,
>
> On Fri, 4 Feb 2022, Elijah Newren wrote:
>
>> On Fri, Feb 4, 2022 at 3:10 PM Johannes Schindelin
>> <Johannes.Schindelin@gmx.de> wrote:
>> >
>> > On Sat, 29 Jan 2022, Elijah Newren wrote:
>> >
>> > > On Sat, Jan 29, 2022 at 12:23 AM Johannes Sixt <j6t@kdbg.org> wrote:
>> > > >
>> > > > Just a heckling from the peanut gallery...
>> > > >
>> > > > Am 29.01.22 um 07:08 schrieb Elijah Newren:
>> > > > > On Fri, Jan 28, 2022 at 8:55 AM Johannes Schindelin
>> > > > > <Johannes.Schindelin@gmx.de> wrote:
>> > > > >> Meaning: Even if stage 3 is missing from the first conflict and stage 1 is
>> > > > >> missing from the second conflict, in the output we would see stages 1, 2,
>> > > > >> 2, 3, i.e. a duplicate stage 2, signifying that we're talking about two
>> > > > >> different conflicts.
>> > > > >
>> > > > > I don't understand why you're fixating on the stage here.  Why would
>> > > > > you want to group all the stage 2s together, count them up, and then
>> > > > > determine there are N conflicting files because there are N stage 2's?
>> > > >
>> > > > Looks like you are misunderstanding Dscho's point: When you have two
>> > > > conflicts, the first with stages 1 and 2, the second with stages 2 and
>> > > > 3, then the 2s occur lumped together when the 4 lines are printed in a
>> > > > row, and that is the cue to the parser where the new conflict begins.
>> > > > Dscho did not mean that all N 2s of should be listed together.
>> > >
>> > > Ah, so...I didn't understand his misunderstanding?  Using stages as a
>> > > cue to the parser where the new conflict begins is broken; you should
>> > > instead check for when the filename listed on a line does not match
>> > > the filename on the previous line.
>> >
>> > But that would break down in case of rename/rename conflicts, right?
>> >
>> > > In particular, if one conflict has stages 1 and 2, and the next conflict
>> > > has only stage 3, then looking at stages only might cause you to
>> > > accidentally lump unrelated conflicts together.
>> >
>> > Precisely. That's why I would love to have a way to deviate from the
>> > output of `ls-files -u`'s format, and have a reliable way to indicate
>> > stages that belong to the same merge conflict.
>>
>> Ah, attempting to somehow identify and present logical separate
>> conflicts?  That could be awesome, but I'm not sure it's technically
>> possible.  It certainly isn't with today's merge-ort.
>>
>> Let me ask some questions first...
>>
>> If I understand you correctly then in the event of a rename/rename,
>> i.e. foo->bar & foo->baz, then you want foo's, bar's, & baz's stages
>> all listed together.  Right?  And in some way that you can identify
>> them as related?
>
> Yes, that was kind of my idea ;-)
>
>> If we do so, how do we mark the beginning and the end of what you call
>> "the same merge conflict"?  If you say it's always 3 stages (with the
>> possibility of all-zero modes/oids), then what about the rename/rename
>> case above modified so that the side that did foo->baz also added a
>> different 'bar'?  That'd be 4 non-zero modes/oids, all of them
>> relevant.  Or what if the side that did foo->bar also renamed
>> something else to 'baz', giving us even more non-zero stages for these
>> three paths?  Perhaps you consider these different conflicts and want
>> them listed separately -- if so, where does one conflict begin and
>> another start and which stages are parts of which conflict?
>
> Thank you for calling me out on an only half-finished thought.
>
> To be quite honest, I previously did not have much time to think of the
> non-trivial nature of representing merge conflicts in a web UI when
> performing merges with rename detection, in particular when performing
> _recursive_ merges (where, as you point out so correctly, an arbitrary
> number of rename conflicts can happen _for the same file_).
>
> Of course, this gets even more complicated when we're also thinking about
> type changes (file -> symlink, file -> directory, and vice versa).
>
>> If you are attempting to somehow present the stuff that "belongs to
>> the same merge conflict" are you also trying to identify what kind of
>> merge conflict it is?  If so, do you want each type of merge conflict
>> listed?  For example, let's switch from the example above of logically
>> disjoint paths coming together to result in more than 3 stages, and
>> instead pick an example with a single logical path with less than
>> three stages.  And now let's say that path has multiple conflicts
>> associated with it; let's use an example with 3: rename/delete +
>> modify/delete + directory/file (one side renames foo->bar while
>> modifying the contents, the other side deletes foo and adds the
>> directory 'bar/').  In this case, there is a target file 'bar' that
>> has two non-zero modes/oids in the ls-files-u output.  If all three
>> types of conflicts need to be listed, does each need to be listed with
>> the two non-zero modes/oids (and perhaps one zero mode/oid), resulting
>> in six listings for 'bar'?  Or would the duplication be confusing
>> enough that we instead decide to list some merge conflicts with no
>> stages associated with them?
>>
>> Thinking about both sets of questions in the last two paragraphs from
>> a higher level -- should we focus on and group the higher order stages
>> by the individual conflicts that happen, or should we group them by
>> the paths that they happen to (which is what `ls-files -u` happens to
>> do), or should we not bother grouping them and instead duplicate the
>> higher order stages for each logical conflict it is part of?
>>
>> As an alternative to duplicating higher order stages, do we sometimes
>> decide to "lump" separate conflicts together and treat them as one
>> conflict?  If so, what are the rules on how we decide to lump
>> conflicts and when not to?  Is there a bright line boundary?  And can
>> it be done without sucking in arbitrarily more stages for a single
>> conflict?
>>
>>
>> Some testcases that might be useful while considering the above
>> questions: take a look at the "rad", "rrdd", and "mod6" tests of
>> t6422.  How many "same merge conflicts" are there for each of those,
>> and what's the boundary between them?  And can you give the answer in
>> the form of rules that generically handle all cases, rather than just
>> answering these three specific cases?
>>
>>
>> I've thought about this problem long and hard before (in part because
>> of some conversations I had with Edward Thompson about libgit2 and
>
> Not a big deal for _me_, but I seem to remember that Ed cared a lot about
> having no p in their surname ;-)
>
>> merging at Git Merge 2020).  It wasn't at all clear to me that libgit2
>> had considered anything beyond simple rename cases.  The only rules I
>> ever figured out that made sense to me was "group the stages by target
>> filename rather than by logical conflict" (so we get `ls -files -u`
>> populated) and print a meant-for-human message for each logical
>> conflict (found in the <Informational Messages> section for
>> merge-tree), and make NO attempt to connect stages by conflict type.
>>
>> I'm sure that's not what you wanted to hear, and maybe doesn't even
>> play nicely with your design.  But short of ignoring the edge and
>> corner cases, I don't see how to solve that problem.  If you do just
>> want to ignore edge and corner cases, then just ignore the
>> rename/rename case you brought up in the first place and just use
>> `ls-files -u`-type output as-is within your design.  If you don't want
>> to ignore edge cases and want something that works with a specific
>> design that somehow groups conflicted file stages by conflict type,
>> then we're going to have to dig into all these questions above and do
>> some big replumbing within merge-ort.
>
> There is sometimes a big difference between what I want to hear and what I
> need to hear. Thank you for providing so many details that I needed to
> hear.
>
> So let's take a step back and look at my goal here, as in: the
> over-arching goal: to use merge-ort on the server side.
>
> From what you said above, it becomes very clear to me that there is very
> little chance to resolve such conflicts on the server side.
>
> For example, if a topic branch renames a file differently than the main
> branch, there is a really good chance that the user tasked with merging
> the topic branch will have to do a whole lot more than just click a few
> buttons to perform that task. There might very well be the need to edit
> files that do not contain merge conflict markers (I like to call those
> cases "non-semantic merge conflicts"), and almost certainly local testing
> will be necessary.
>
> So I guess the best we can do in those complicated cases is to give a
> comprehensive overview of the problems in the web UI, with the note that
> this merge conflict has to be resolved on the local side.
>
> Which brings me to the next concern: since `merge-tree` is a low-level
> tool meant to be called by programs rather than humans, we need to make
> sure that those messages remain machine-parseable, even if they contain
> file names.
>
> Concretely: while I am not currently aware of any web UI that allows to
> resolve simple rename/rename conflicts, it is easily conceivable how to
> implement such a thing. When that happens, we will need to be able to
> teach the server-side code to discern between the cases that can be
> handled in the web UI (trivial merge conflicts, trivial rename/rename
> conflicts) as compared to scenarios where the conflicts are just too
> complex.
>
> Here's an excerpt from t4301:
>
> -- snip --
> Auto-merging greeting
> CONFLICT (content): Merge conflict in greeting
> Auto-merging numbers
> CONFLICT (file/directory): directory in the way of whatever from side1; moving it to whatever~side1 instead.
> CONFLICT (modify/delete): whatever~side1 deleted in side2 and modified in side1.  Version side1 of whatever~side1 left in tree.
> -- snap --
>
> This is the complete set of messages provided in the `test conflict
> notices and such` test case.
>
> I immediately notice that every line contains at least one file name.
> Looking at https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1899, it
> does not seem as if the file names are quoted:
>
> 		path_msg(opt, path, 1, _("Auto-merging %s"), path);
>
> (where `path` is used verbatim in a call to `merge_3way()` before that,
> i.e. it must not have been quoted)
>
> I would like to register a wish to ensure that file names with special
> characters (such as most notably line-feed characters) are quoted in these
> messages, so that a simple server-side parser can handle messages starting
> with `Auto-merging` and with `CONFLICT (content): Merge conflict in `, and
> "throw the hands up in the air" if any other message prefix is seen.
>
> Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
> For the `Auto-merging` one, it would be trivial, but I fear that we will
> have to work a bit on the `path_msg()` function
> (https://github.com/git/git/blob/v2.35.1/merge-ort.c#L630-L649) because it
> accepts a variable list of arguments without any clue whether the
> arguments refer to paths or not. (And I would be loathe to switch _all_
> callers to do the quoting themselves.)
>
> I see 28 calls to that function, and at least a couple that pass not only
> a path but also an OID (e.g.
> https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1611-L1613).
>
> We could of course be sloppy and pass even OIDs through
> `sq_quote_buf_pretty()` in `path_msg()`, knowing that there won't be any
> special characters in them, but it gets more complicated e.g. in
> https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1648-L1651, where we
> pass an `strbuf` that contains a somewhat free-form commit message.
>
> I guess we could still pass those through `sq_quote_buf_pretty()`, even if
> they are not paths, to ensure that there are no special characters in the
> machine-parseable lines.
>
> What do you think?
>
> Ciao,
> Dscho
Johannes Schindelin Feb. 22, 2022, 4:54 p.m. UTC | #13
Hi Elijah,

On Mon, 21 Feb 2022, Johannes Schindelin wrote:

> [...] since `merge-tree` is a low-level tool meant to be called by
> programs rather than humans, we need to make sure that those messages
> remain machine-parseable, even if they contain file names.
>
> [...]
>
> Do you think we can switch to `sq_quote_buf_pretty()` for these messages?

Or maybe much better: use NUL to separate those messages if `-z` is passed
to `merge-tree`? That would address the issue in one elegant diff.

What do you think?

Ciao,
Dscho
Elijah Newren Feb. 23, 2022, 2:15 a.m. UTC | #14
On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Fri, 4 Feb 2022, Elijah Newren wrote:
>
> > On Fri, Feb 4, 2022 at 3:10 PM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> > >
> > > On Sat, 29 Jan 2022, Elijah Newren wrote:
[...]
> > I've thought about this problem long and hard before (in part because
> > of some conversations I had with Edward Thompson about libgit2 and
>
> Not a big deal for _me_, but I seem to remember that Ed cared a lot about
> having no p in their surname ;-)

Eek!  My apologies to Ed; I'll try to remember and do better.

> > merging at Git Merge 2020).  It wasn't at all clear to me that libgit2
> > had considered anything beyond simple rename cases.  The only rules I
> > ever figured out that made sense to me was "group the stages by target
> > filename rather than by logical conflict" (so we get `ls -files -u`
> > populated) and print a meant-for-human message for each logical
> > conflict (found in the <Informational Messages> section for
> > merge-tree), and make NO attempt to connect stages by conflict type.
> >
> > I'm sure that's not what you wanted to hear, and maybe doesn't even
> > play nicely with your design.  But short of ignoring the edge and
> > corner cases, I don't see how to solve that problem.  If you do just
> > want to ignore edge and corner cases, then just ignore the
> > rename/rename case you brought up in the first place and just use
> > `ls-files -u`-type output as-is within your design.  If you don't want
> > to ignore edge cases and want something that works with a specific
> > design that somehow groups conflicted file stages by conflict type,
> > then we're going to have to dig into all these questions above and do
> > some big replumbing within merge-ort.
>
> There is sometimes a big difference between what I want to hear and what I
> need to hear. Thank you for providing so many details that I needed to
> hear.
>
> So let's take a step back and look at my goal here, as in: the
> over-arching goal: to use merge-ort on the server side.
>
> From what you said above, it becomes very clear to me that there is very
> little chance to resolve such conflicts on the server side.

No, it's still resolvable on the server side.  It's just that attempts
to break up the information into individual logical conflicts is
problematic; if you provide _all_ the informational conflict messages
to the user, and iterate through the paths with conflicts, then things
are fine.  It's only when you attempt to find "the relevant subset"
that things become hard.  Of course, providing it all at once might be
a UI that you hate (perhaps because it's too much like how the command
line behaves...)

> For example, if a topic branch renames a file differently than the main
> branch, there is a really good chance that the user tasked with merging
> the topic branch will have to do a whole lot more than just click a few
> buttons to perform that task. There might very well be the need to edit
> files that do not contain merge conflict markers (I like to call those
> cases "non-semantic merge conflicts"), and almost certainly local testing
> will be necessary.

Sidenote: Do you lump in binary merge conflicts with "non-semantic
merge conflicts"?  You would by your definition, but I'm not sure it
matches.

I tend to call things either content-based conflicts or path-based
conflicts, where content-based usually means textual-based but also
includes merges of binaries.

> So I guess the best we can do in those complicated cases is to give a
> comprehensive overview of the problems in the web UI, with the note that
> this merge conflict has to be resolved on the local side.
>
> Which brings me to the next concern: since `merge-tree` is a low-level
> tool meant to be called by programs rather than humans, we need to make
> sure that those messages remain machine-parseable, even if they contain
> file names.
>
> Concretely: while I am not currently aware of any web UI that allows to
> resolve simple rename/rename conflicts, it is easily conceivable how to
> implement such a thing. When that happens, we will need to be able to
> teach the server-side code to discern between the cases that can be
> handled in the web UI (trivial merge conflicts, trivial rename/rename
> conflicts) as compared to scenarios where the conflicts are just too
> complex.

Um, I'm really worried about attempting to make the conflict notices
machine parseable.  I don't like that idea at all, and I even tried to
rule that out already with my wording:
"""
In all cases, the
<Informational messages> section has the necessary info, though it is
not designed to be machine parseable.
"""
though maybe I should have been even more explicit.  The restrictions
that those messages be stable is too rigid, I think.  I also think
they're a poor way to communicate information to a higher level tool.
I would much rather us add some kind of additional return data
structures from merge ort and use them if we want extra info.

> Here's an excerpt from t4301:
>
> -- snip --
> Auto-merging greeting
> CONFLICT (content): Merge conflict in greeting
> Auto-merging numbers
> CONFLICT (file/directory): directory in the way of whatever from side1; moving it to whatever~side1 instead.
> CONFLICT (modify/delete): whatever~side1 deleted in side2 and modified in side1.  Version side1 of whatever~side1 left in tree.
> -- snap --
>
> This is the complete set of messages provided in the `test conflict
> notices and such` test case.
>
> I immediately notice that every line contains at least one file name.
> Looking at https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1899, it
> does not seem as if the file names are quoted:
>
>                 path_msg(opt, path, 1, _("Auto-merging %s"), path);
>
> (where `path` is used verbatim in a call to `merge_3way()` before that,
> i.e. it must not have been quoted)
>
> I would like to register a wish to ensure that file names with special
> characters (such as most notably line-feed characters) are quoted in these
> messages, so that a simple server-side parser can handle messages starting
> with `Auto-merging` and with `CONFLICT (content): Merge conflict in `, and
> "throw the hands up in the air" if any other message prefix is seen.
>
> Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
> For the `Auto-merging` one, it would be trivial, but I fear that we will
> have to work a bit on the `path_msg()` function
> (https://github.com/git/git/blob/v2.35.1/merge-ort.c#L630-L649) because it
> accepts a variable list of arguments without any clue whether the
> arguments refer to paths or not. (And I would be loathe to switch _all_
> callers to do the quoting themselves.)
>
> I see 28 calls to that function, and at least a couple that pass not only
> a path but also an OID (e.g.
> https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1611-L1613).
>
> We could of course be sloppy and pass even OIDs through
> `sq_quote_buf_pretty()` in `path_msg()`, knowing that there won't be any
> special characters in them, but it gets more complicated e.g. in
> https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1648-L1651, where we
> pass an `strbuf` that contains a somewhat free-form commit message.
>
> I guess we could still pass those through `sq_quote_buf_pretty()`, even if
> they are not paths, to ensure that there are no special characters in the
> machine-parseable lines.
>
> What do you think?

Switching to single quoting paths as a matter of style might make
sense, but only if we go through and change every caller to do so so
that we can make sure it applies to all paths.  And only paths and not
OIDs.

But I'm going to reserve the right in merge-ort to modify, add, or
delete any of those messages passed to path_msg(), which might wreak
havoc on your attempts to parse those strings.  I think they're a bad
form for communicating information to a script or program, and trying
to transform them into such risks making them suboptimal at
communicating info to humans.  These messages should optimize the
latter, and if we want something for the former, it should probably be
a new independent bit of info.
Elijah Newren Feb. 23, 2022, 3:13 a.m. UTC | #15
On Tue, Feb 22, 2022 at 8:54 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Mon, 21 Feb 2022, Johannes Schindelin wrote:
>
> > [...] since `merge-tree` is a low-level tool meant to be called by
> > programs rather than humans, we need to make sure that those messages
> > remain machine-parseable, even if they contain file names.
> >
> > [...]
> >
> > Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
>
> Or maybe much better: use NUL to separate those messages if `-z` is passed
> to `merge-tree`? That would address the issue in one elegant diff.
>
> What do you think?

Separating the combination of messages for a single target path from
the combination of messages for a different target path by a NUL
character may make sense.  Would we also want the messages for a path
to be prepended by the pathname and a NUL character, in this case, to
make it easier to determine which path the group of messages are for?

I'm not sure if that does exactly what you are asking, though.

The thing that is stored (in opt->priv->output) is a strbuf per path,
not an array of strbufs per path.  So, if we have a rename/delete and
a rename/add and a mode conflict for the same "path" (A->B on one
side, other side deletes A and adds a symlink B, resulting in three
messages for path "B" that are all appended into a single strbuf),
then we'll have a single "message" which has three newlines.  We can
add a NUL character at the end of that, but not between the messages
without restructuring things a bit.

There's also at least one example, with submodules, where there are
two path_msg() calls for the same individual conflict in order to
split conflict info from resolution advice, and those really shouldn't
be thought of as messages for different conflicts.  (I'm starting to
wonder if the resolution advice should just be tossed; I kept it
because merge-recursive had it, but it might not make sense with
merge-ort being used by server side merges.  But even if we toss that
one, I'm not sure I want to commit to one path_msg() call per "logical
conflict".)

But...maybe this would be good enough for some kind of use you have?
Because if you only want to care about "simple" cases, you could
potentially define those as ones with only one newline  in them.
Johannes Schindelin Feb. 25, 2022, 4:26 p.m. UTC | #16
Hi Elijah,

On Tue, 22 Feb 2022, Elijah Newren wrote:

> On Tue, Feb 22, 2022 at 8:54 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > On Mon, 21 Feb 2022, Johannes Schindelin wrote:
> >
> > > [...] since `merge-tree` is a low-level tool meant to be called by
> > > programs rather than humans, we need to make sure that those messages
> > > remain machine-parseable, even if they contain file names.
> > >
> > > [...]
> > >
> > > Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
> >
> > Or maybe much better: use NUL to separate those messages if `-z` is passed
> > to `merge-tree`? That would address the issue in one elegant diff.
> >
> > What do you think?
>
> Separating the combination of messages for a single target path from
> the combination of messages for a different target path by a NUL
> character may make sense.  Would we also want the messages for a path
> to be prepended by the pathname and a NUL character, in this case, to
> make it easier to determine which path the group of messages are for?
>
> I'm not sure if that does exactly what you are asking, though.

The most important thing I am asking for is a way for a program calling
`merge-tree` to figure out whether it knows how to handle all the types of
conflicts encountered in this merge.

So that a web UI could present e.g. simple content conflicts, and even
rename/rename conflicts, but would know that it cannot resolve the
conflicts e.g. when a submodule is affected.

So... I am fairly certain that we're not as close to addressing this as I
had hoped for.

> The thing that is stored (in opt->priv->output) is a strbuf per path,
> not an array of strbufs per path.  So, if we have a rename/delete and
> a rename/add and a mode conflict for the same "path" (A->B on one
> side, other side deletes A and adds a symlink B, resulting in three
> messages for path "B" that are all appended into a single strbuf),
> then we'll have a single "message" which has three newlines.  We can
> add a NUL character at the end of that, but not between the messages
> without restructuring things a bit.
>
> There's also at least one example, with submodules, where there are
> two path_msg() calls for the same individual conflict in order to
> split conflict info from resolution advice, and those really shouldn't
> be thought of as messages for different conflicts.  (I'm starting to
> wonder if the resolution advice should just be tossed; I kept it
> because merge-recursive had it, but it might not make sense with
> merge-ort being used by server side merges.  But even if we toss that
> one, I'm not sure I want to commit to one path_msg() call per "logical
> conflict".)
>
> But...maybe this would be good enough for some kind of use you have?
> Because if you only want to care about "simple" cases, you could
> potentially define those as ones with only one newline  in them.

We cannot rely on newline character parsing because that is a valid
filename character on Unix:

	$ echo a >'with
	> a newline'

	$ ls -la with*
	-rw-r--r-- 1 me me 2 Feb 25 17:10 'with'$'\n''a newline'

I guess we have to work harder on this and add more than just an `strbuf`
so that we can output `<path>NUL<conflict-type>NUL` pairs (where we
promise to keep the `<conflict-type>` strings constant) or something
similar.

Ciao,
Dscho
Johannes Schindelin Feb. 25, 2022, 4:31 p.m. UTC | #17
Hi Elijah,

On Tue, 22 Feb 2022, Elijah Newren wrote:

> Sidenote: Do you lump in binary merge conflicts with "non-semantic
> merge conflicts"?  You would by your definition, but I'm not sure it
> matches.
>
> I tend to call things either content-based conflicts or path-based
> conflicts, where content-based usually means textual-based but also
> includes merges of binaries.

I like "content-based conflicts".

And no, I had not even thought about binary merge conflicts yet...

> On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > Concretely: while I am not currently aware of any web UI that allows
> > to resolve simple rename/rename conflicts, it is easily conceivable
> > how to implement such a thing. When that happens, we will need to be
> > able to teach the server-side code to discern between the cases that
> > can be handled in the web UI (trivial merge conflicts, trivial
> > rename/rename conflicts) as compared to scenarios where the conflicts
> > are just too complex.
>
> Um, I'm really worried about attempting to make the conflict notices
> machine parseable.  I don't like that idea at all, and I even tried to
> rule that out already with my wording:
> """
> In all cases, the
> <Informational messages> section has the necessary info, though it is
> not designed to be machine parseable.
> """
> though maybe I should have been even more explicit.  The restrictions
> that those messages be stable is too rigid, I think.  I also think
> they're a poor way to communicate information to a higher level tool.
> I would much rather us add some kind of additional return data
> structures from merge ort and use them if we want extra info.

Okay.

I thought that we could keep the `CONFLICT (<type>)` constant enough to
serve as such a machine-parseable thing. And then presenting
`<path>NUL<message>NUL` could have served my use case well...

> > Here's an excerpt from t4301:
> >
> > -- snip --
> > Auto-merging greeting
> > CONFLICT (content): Merge conflict in greeting
> > Auto-merging numbers
> > CONFLICT (file/directory): directory in the way of whatever from side1; moving it to whatever~side1 instead.
> > CONFLICT (modify/delete): whatever~side1 deleted in side2 and modified in side1.  Version side1 of whatever~side1 left in tree.
> > -- snap --
> >
> > This is the complete set of messages provided in the `test conflict
> > notices and such` test case.
> >
> > I immediately notice that every line contains at least one file name.
> > Looking at https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1899, it
> > does not seem as if the file names are quoted:
> >
> >                 path_msg(opt, path, 1, _("Auto-merging %s"), path);
> >
> > (where `path` is used verbatim in a call to `merge_3way()` before that,
> > i.e. it must not have been quoted)
> >
> > I would like to register a wish to ensure that file names with special
> > characters (such as most notably line-feed characters) are quoted in these
> > messages, so that a simple server-side parser can handle messages starting
> > with `Auto-merging` and with `CONFLICT (content): Merge conflict in `, and
> > "throw the hands up in the air" if any other message prefix is seen.
> >
> > Do you think we can switch to `sq_quote_buf_pretty()` for these messages?
> > For the `Auto-merging` one, it would be trivial, but I fear that we will
> > have to work a bit on the `path_msg()` function
> > (https://github.com/git/git/blob/v2.35.1/merge-ort.c#L630-L649) because it
> > accepts a variable list of arguments without any clue whether the
> > arguments refer to paths or not. (And I would be loathe to switch _all_
> > callers to do the quoting themselves.)
> >
> > I see 28 calls to that function, and at least a couple that pass not only
> > a path but also an OID (e.g.
> > https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1611-L1613).
> >
> > We could of course be sloppy and pass even OIDs through
> > `sq_quote_buf_pretty()` in `path_msg()`, knowing that there won't be any
> > special characters in them, but it gets more complicated e.g. in
> > https://github.com/git/git/blob/v2.35.1/merge-ort.c#L1648-L1651, where we
> > pass an `strbuf` that contains a somewhat free-form commit message.
> >
> > I guess we could still pass those through `sq_quote_buf_pretty()`, even if
> > they are not paths, to ensure that there are no special characters in the
> > machine-parseable lines.
> >
> > What do you think?
>
> Switching to single quoting paths as a matter of style might make
> sense, but only if we go through and change every caller to do so so
> that we can make sure it applies to all paths.  And only paths and not
> OIDs.

Yes, that sounds unappealing.

> But I'm going to reserve the right in merge-ort to modify, add, or
> delete any of those messages passed to path_msg(), which might wreak
> havoc on your attempts to parse those strings.  I think they're a bad
> form for communicating information to a script or program, and trying
> to transform them into such risks making them suboptimal at
> communicating info to humans.  These messages should optimize the
> latter, and if we want something for the former, it should probably be
> a new independent bit of info.

Makes sense.

So we need something in addition to those messages.

Ciao,
Dscho
Junio C Hamano Feb. 25, 2022, 6:40 p.m. UTC | #18
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> I tend to call things either content-based conflicts or path-based
>> conflicts, where content-based usually means textual-based but also
>> includes merges of binaries.
>
> I like "content-based conflicts".

Yup, even before ort existed, we had clear distinction between tree
level merges (i.e. which path corresponds to which other path, which
is done in unpack_trees() and "read-tree O A B") and content level
merges (which is done with ll_merge()).

>> Switching to single quoting paths as a matter of style might make
>> sense, but only if we go through and change every caller to do so so
>> that we can make sure it applies to all paths.  And only paths and not
>> OIDs.
>
> Yes, that sounds unappealing.
>
>> But I'm going to reserve the right in merge-ort to modify, add, or
>> delete any of those messages passed to path_msg(), which might wreak
>> havoc on your attempts to parse those strings.  I think they're a bad
>> form for communicating information to a script or program, and trying
>> to transform them into such risks making them suboptimal at
>> communicating info to humans.  These messages should optimize the
>> latter, and if we want something for the former, it should probably be
>> a new independent bit of info.
>
> Makes sense.

As long as it is made clear that the path_msg() is not for machine
consumption (perhaps we can sprinkle <RED><RESET> at random places
to make it impossible for machines to handle), I think the direction
makes sense, too ;-)
Elijah Newren Feb. 26, 2022, 6:53 a.m. UTC | #19
Hi Dscho,

On Fri, Feb 25, 2022 at 8:31 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Tue, 22 Feb 2022, Elijah Newren wrote:
>
> > Sidenote: Do you lump in binary merge conflicts with "non-semantic
> > merge conflicts"?  You would by your definition, but I'm not sure it
> > matches.
> >
> > I tend to call things either content-based conflicts or path-based
> > conflicts, where content-based usually means textual-based but also
> > includes merges of binaries.
>
> I like "content-based conflicts".
>
> And no, I had not even thought about binary merge conflicts yet...
>
> > On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> > >
> > > Concretely: while I am not currently aware of any web UI that allows
> > > to resolve simple rename/rename conflicts, it is easily conceivable
> > > how to implement such a thing. When that happens, we will need to be
> > > able to teach the server-side code to discern between the cases that
> > > can be handled in the web UI (trivial merge conflicts, trivial
> > > rename/rename conflicts) as compared to scenarios where the conflicts
> > > are just too complex.
> >
> > Um, I'm really worried about attempting to make the conflict notices
> > machine parseable.  I don't like that idea at all, and I even tried to
> > rule that out already with my wording:
> > """
> > In all cases, the
> > <Informational messages> section has the necessary info, though it is
> > not designed to be machine parseable.
> > """
> > though maybe I should have been even more explicit.  The restrictions
> > that those messages be stable is too rigid, I think.  I also think
> > they're a poor way to communicate information to a higher level tool.
> > I would much rather us add some kind of additional return data
> > structures from merge ort and use them if we want extra info.
>
> Okay.
>
> I thought that we could keep the `CONFLICT (<type>)` constant enough to
> serve as such a machine-parseable thing.

That _probably_ is, but I thought you wanted to parse all N paths
embedded in the message after that part as well?

> And then presenting
> `<path>NUL<message>NUL` could have served my use case well...

Would it?  Wouldn't you need something more like

<number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL
 ?

I mean, if rename/rename is what you want to handle, there are three
paths in that message.  And you need to know all three paths in order
to combine the relevant parts of the <Conflicted File Info> section
together.

(Also, while we're at it, I decided to throw a stable short-type
description string (e.g. "CONFLICT (rename/rename)") in there, which
will _probably_ be the first part of the message string but still
allow us to change the message string later if we want.)


Also, we'd want those parsing this information to keep in mind that:
  * Any given conflict can affect multiple paths
  * Any path can be part of multiple conflicts
  * (The above two items imply a potentially many-to-many relationship
between paths and conflicts)
  * Paths listed in these logical conflicts may not correspond to a
file in the index (they could be a directory, or file that was in a
previous version)
  * Some of these "logical conflicts" are not actually conflicts but
just notices (e.g. "auto-merging" or "submodule updated" or "WARNING"
or "<submodules are weird>" messages)

and we'd have to do some work to make sure the paths in the given
messages lined up with the files actually recorded in the index (e.g.
with distinct types we rename both files to avoid the collision, but
print the conflict notice for the original path rather than the new
paths)

[...]
> > But I'm going to reserve the right in merge-ort to modify, add, or
> > delete any of those messages passed to path_msg(), which might wreak
> > havoc on your attempts to parse those strings.  I think they're a bad
> > form for communicating information to a script or program, and trying
> > to transform them into such risks making them suboptimal at
> > communicating info to humans.  These messages should optimize the
> > latter, and if we want something for the former, it should probably be
> > a new independent bit of info.
>
> Makes sense.
>
> So we need something in addition to those messages.

Yes.  Does the proposal above sound like it'd cover your needs?  If
so, we'd probably need to go through all the callers to path_msg() and
either add an immediate call to another function immediately
afterwards that stores this additional information or somehow change
the path_msg() call itself to somehow take an additional arbitrary
list of arguments representing the paths and short-desc we want to
store somewhere.
Johannes Schindelin March 7, 2022, 4:27 p.m. UTC | #20
Hi Elijah,

On Fri, 25 Feb 2022, Elijah Newren wrote:

> On Fri, Feb 25, 2022 at 8:31 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > On Tue, 22 Feb 2022, Elijah Newren wrote:
> >
> > > On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
> > > <Johannes.Schindelin@gmx.de> wrote:
> > > >
> > > > Concretely: while I am not currently aware of any web UI that allows
> > > > to resolve simple rename/rename conflicts, it is easily conceivable
> > > > how to implement such a thing. When that happens, we will need to be
> > > > able to teach the server-side code to discern between the cases that
> > > > can be handled in the web UI (trivial merge conflicts, trivial
> > > > rename/rename conflicts) as compared to scenarios where the conflicts
> > > > are just too complex.
> > >
> > > Um, I'm really worried about attempting to make the conflict notices
> > > machine parseable.  I don't like that idea at all, and I even tried to
> > > rule that out already with my wording:
> > > """
> > > In all cases, the
> > > <Informational messages> section has the necessary info, though it is
> > > not designed to be machine parseable.
> > > """
> > > though maybe I should have been even more explicit.  The restrictions
> > > that those messages be stable is too rigid, I think.  I also think
> > > they're a poor way to communicate information to a higher level tool.
> > > I would much rather us add some kind of additional return data
> > > structures from merge ort and use them if we want extra info.
> >
> > Okay.
> >
> > I thought that we could keep the `CONFLICT (<type>)` constant enough to
> > serve as such a machine-parseable thing.
>
> That _probably_ is, but I thought you wanted to parse all N paths
> embedded in the message after that part as well?

Actually, no, sorry for being unclear. My main aim with the
machine-parseable messages was to detect whether a given failed merge
contains _only_ conflicts of the sort that a particular UI can handle.

In that respect, I would not even need to parse the file names, I'd just
require them not to contain line feed characters ;-)

> > And then presenting
> > `<path>NUL<message>NUL` could have served my use case well...
>
> Would it?  Wouldn't you need something more like
>
> <number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL
>  ?

Probably, you're right.

> I mean, if rename/rename is what you want to handle, there are three
> paths in that message.  And you need to know all three paths in order
> to combine the relevant parts of the <Conflicted File Info> section
> together.
>
> (Also, while we're at it, I decided to throw a stable short-type
> description string (e.g. "CONFLICT (rename/rename)") in there, which
> will _probably_ be the first part of the message string but still
> allow us to change the message string later if we want.)

Yes, I very much like that idea to keep the prefix in a format that we can
guarantee to be stable enough for applications (or server backends) to
rely on.

> Also, we'd want those parsing this information to keep in mind that:
>   * Any given conflict can affect multiple paths
>   * Any path can be part of multiple conflicts
>   * (The above two items imply a potentially many-to-many relationship
> between paths and conflicts)
>   * Paths listed in these logical conflicts may not correspond to a
> file in the index (they could be a directory, or file that was in a
> previous version)
>   * Some of these "logical conflicts" are not actually conflicts but
> just notices (e.g. "auto-merging" or "submodule updated" or "WARNING"
> or "<submodules are weird>" messages)
>
> and we'd have to do some work to make sure the paths in the given
> messages lined up with the files actually recorded in the index (e.g.
> with distinct types we rename both files to avoid the collision, but
> print the conflict notice for the original path rather than the new
> paths)
>
> [...]
> > > But I'm going to reserve the right in merge-ort to modify, add, or
> > > delete any of those messages passed to path_msg(), which might wreak
> > > havoc on your attempts to parse those strings.  I think they're a bad
> > > form for communicating information to a script or program, and trying
> > > to transform them into such risks making them suboptimal at
> > > communicating info to humans.  These messages should optimize the
> > > latter, and if we want something for the former, it should probably be
> > > a new independent bit of info.
> >
> > Makes sense.
> >
> > So we need something in addition to those messages.
>
> Yes.  Does the proposal above sound like it'd cover your needs?  If
> so, we'd probably need to go through all the callers to path_msg() and
> either add an immediate call to another function immediately
> afterwards that stores this additional information or somehow change
> the path_msg() call itself to somehow take an additional arbitrary
> list of arguments representing the paths and short-desc we want to
> store somewhere.

Yes, you're right, the proper thing to do is to go through the callers to
`path_msg()` so that we can provide that stable output you proposed. A
couple of thoughts about this:

* These are not informal messages, i.e. I think we would need another flag
  that would then trigger another double-`NUL`-separated section to be
  shown, probably between the conflicted file info and the informational
  messages.

* Instead of _adding_ the calls, we could look into refactoring the
  existing `path_msg()` calls, introducing yet another function that would
  call `path_msg()` but also optionally generate that machine-parseable
  conflict info.

* None of this needs to hold up `en/merge-tree`. I am sorry that I am the
  blocker (unintentionally so, I guarantee you that!).

Ciao,
Dscho
Elijah Newren March 8, 2022, 8:25 a.m. UTC | #21
On Mon, Mar 7, 2022 at 8:27 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Fri, 25 Feb 2022, Elijah Newren wrote:
>
> > On Fri, Feb 25, 2022 at 8:31 AM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> > >
> > > On Tue, 22 Feb 2022, Elijah Newren wrote:
> > >
> > > > On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
> > > > <Johannes.Schindelin@gmx.de> wrote:
> > > > >
> > > > > Concretely: while I am not currently aware of any web UI that allows
> > > > > to resolve simple rename/rename conflicts, it is easily conceivable
> > > > > how to implement such a thing. When that happens, we will need to be
> > > > > able to teach the server-side code to discern between the cases that
> > > > > can be handled in the web UI (trivial merge conflicts, trivial
> > > > > rename/rename conflicts) as compared to scenarios where the conflicts
> > > > > are just too complex.
> > > >
> > > > Um, I'm really worried about attempting to make the conflict notices
> > > > machine parseable.  I don't like that idea at all, and I even tried to
> > > > rule that out already with my wording:
> > > > """
> > > > In all cases, the
> > > > <Informational messages> section has the necessary info, though it is
> > > > not designed to be machine parseable.
> > > > """
> > > > though maybe I should have been even more explicit.  The restrictions
> > > > that those messages be stable is too rigid, I think.  I also think
> > > > they're a poor way to communicate information to a higher level tool.
> > > > I would much rather us add some kind of additional return data
> > > > structures from merge ort and use them if we want extra info.
> > >
> > > Okay.
> > >
> > > I thought that we could keep the `CONFLICT (<type>)` constant enough to
> > > serve as such a machine-parseable thing.
> >
> > That _probably_ is, but I thought you wanted to parse all N paths
> > embedded in the message after that part as well?
>
> Actually, no, sorry for being unclear. My main aim with the
> machine-parseable messages was to detect whether a given failed merge
> contains _only_ conflicts of the sort that a particular UI can handle.

...and in order to do that, you'd need to parse all the filenames to
determine whether files were involved in multiple conflict types,
since you earlier suggested you would just bail on such conflicts (at
https://lore.kernel.org/git/nycvar.QRO.7.76.6.2202211059430.26495@tvgsbejvaqbjf.bet/).
In particular, I think the "mod6" and "rrdd" and "rad" cases kind of
triggered those comments, and rename/rename is involved in two of
those.

Further, even if you could assume there'd only be simple conflicts
such as the simple rename/rename case, you'd still need the files so
that you know _which_ of the modes/oids in the <Conflicted file info>
section relate to the message you are looking at.  (As mentioned
previously, sorting the <Conflicted file info> to attempt to put them
together is not feasible and is a no-go.)

> > > And then presenting
> > > `<path>NUL<message>NUL` could have served my use case well...
> >
> > Would it?  Wouldn't you need something more like
> >
> > <number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL
> >  ?
>
> Probably, you're right.
>
> > I mean, if rename/rename is what you want to handle, there are three
> > paths in that message.  And you need to know all three paths in order
> > to combine the relevant parts of the <Conflicted File Info> section
> > together.
> >
> > (Also, while we're at it, I decided to throw a stable short-type
> > description string (e.g. "CONFLICT (rename/rename)") in there, which
> > will _probably_ be the first part of the message string but still
> > allow us to change the message string later if we want.)
>
> Yes, I very much like that idea to keep the prefix in a format that we can
> guarantee to be stable enough for applications (or server backends) to
> rely on.

Um, I explicitly avoided saying the prefix would be stable by
introducing a separate short string just so that we could change the
prefix later if wanted.  The short string is _probably_ the current
prefix or something close to it, but that stable string wouldn't
necessarily remain the prefix of the message string, since the entire
message string would be allowed to change.

> > Also, we'd want those parsing this information to keep in mind that:
> >   * Any given conflict can affect multiple paths
> >   * Any path can be part of multiple conflicts
> >   * (The above two items imply a potentially many-to-many relationship
> > between paths and conflicts)
> >   * Paths listed in these logical conflicts may not correspond to a
> > file in the index (they could be a directory, or file that was in a
> > previous version)
> >   * Some of these "logical conflicts" are not actually conflicts but
> > just notices (e.g. "auto-merging" or "submodule updated" or "WARNING"
> > or "<submodules are weird>" messages)
> >
> > and we'd have to do some work to make sure the paths in the given
> > messages lined up with the files actually recorded in the index (e.g.
> > with distinct types we rename both files to avoid the collision, but
> > print the conflict notice for the original path rather than the new
> > paths)
> >
> > [...]
> > > > But I'm going to reserve the right in merge-ort to modify, add, or
> > > > delete any of those messages passed to path_msg(), which might wreak
> > > > havoc on your attempts to parse those strings.  I think they're a bad
> > > > form for communicating information to a script or program, and trying
> > > > to transform them into such risks making them suboptimal at
> > > > communicating info to humans.  These messages should optimize the
> > > > latter, and if we want something for the former, it should probably be
> > > > a new independent bit of info.
> > >
> > > Makes sense.
> > >
> > > So we need something in addition to those messages.
> >
> > Yes.  Does the proposal above sound like it'd cover your needs?  If
> > so, we'd probably need to go through all the callers to path_msg() and
> > either add an immediate call to another function immediately
> > afterwards that stores this additional information or somehow change
> > the path_msg() call itself to somehow take an additional arbitrary
> > list of arguments representing the paths and short-desc we want to
> > store somewhere.
>
> Yes, you're right, the proper thing to do is to go through the callers to
> `path_msg()` so that we can provide that stable output you proposed. A
> couple of thoughts about this:
>
> * These are not informal messages, i.e. I think we would need another flag
>   that would then trigger another double-`NUL`-separated section to be
>   shown, probably between the conflicted file info and the informational
>   messages.

Why would that be a separate section, though?  While we don't want
machines parsing the informal messages and trying to determine the
components of that message, they definitely should be able to tell
which informal messages are associated with which paths (otherwise how
can you group the <Conflict message info> as per your needs and how do
you know which message to show the user when dealing with that
particular conflict?).  That requirement suggests the informal
messages should be part of the same section.  Thus my suggestion to
just make it be the <message> part at the end of my earlier suggestion
of

<number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL

> * Instead of _adding_ the calls, we could look into refactoring the
>   existing `path_msg()` calls, introducing yet another function that would
>   call `path_msg()` but also optionally generate that machine-parseable
>   conflict info.

So of the two alternatives I suggested above, it sounds like you're in
favor of the "or somehow change the path_msg() call itself to somehow
take an additional arbitrary list of arguments representing the paths
and short-desc we want to store somewhere" option I suggested.  I
think I prefer that one too.

> * None of this needs to hold up `en/merge-tree`. I am sorry that I am the
>   blocker (unintentionally so, I guarantee you that!).

Sometimes I find it frustrating that changes don't merge down for
months, particularly when the topic was already finished weeks and
weeks (if not months) ago and/or when there's a follow-on series
depending on it.  But in this case I have no follow-on series ready to
send, and this topic has been under (very!) active discussion
essentially the whole time.  And, perhaps even more importantly, I'd
like to avoid solidifying the "machine parseable output format"
prematurely and making it harder to work around later.  I think
addressing this issue you bring up requires fundamentally redoing the
informational messages section as per my proposal, and it makes me
wonder what should even be shown without the -z option (thoughts
welcome).

So, this is one series where even if everyone else says to merge it
already, I'd like to wait a bit longer on it until I feel confident we
have a solution that handles at least the current usecases.
Johannes Schindelin March 10, 2022, 3:10 p.m. UTC | #22
Hi Elijah,

On Tue, 8 Mar 2022, Elijah Newren wrote:

> On Mon, Mar 7, 2022 at 8:27 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > On Fri, 25 Feb 2022, Elijah Newren wrote:
> >
> > > On Fri, Feb 25, 2022 at 8:31 AM Johannes Schindelin
> > > <Johannes.Schindelin@gmx.de> wrote:
> > > >
> > > > On Tue, 22 Feb 2022, Elijah Newren wrote:
> > > >
> > > > > On Mon, Feb 21, 2022 at 2:46 AM Johannes Schindelin
> > > > > <Johannes.Schindelin@gmx.de> wrote:
> > > > > >
> > > > > > Concretely: while I am not currently aware of any web UI that
> > > > > > allows to resolve simple rename/rename conflicts, it is easily
> > > > > > conceivable how to implement such a thing. When that happens,
> > > > > > we will need to be able to teach the server-side code to
> > > > > > discern between the cases that can be handled in the web UI
> > > > > > (trivial merge conflicts, trivial rename/rename conflicts) as
> > > > > > compared to scenarios where the conflicts are just too
> > > > > > complex.
> > > > >
> > > > > Um, I'm really worried about attempting to make the conflict
> > > > > notices machine parseable.  I don't like that idea at all, and I
> > > > > even tried to rule that out already with my wording: """ In all
> > > > > cases, the <Informational messages> section has the necessary
> > > > > info, though it is not designed to be machine parseable. """
> > > > > though maybe I should have been even more explicit.  The
> > > > > restrictions that those messages be stable is too rigid, I
> > > > > think.  I also think they're a poor way to communicate
> > > > > information to a higher level tool. I would much rather us add
> > > > > some kind of additional return data structures from merge ort
> > > > > and use them if we want extra info.
> > > >
> > > > Okay.
> > > >
> > > > I thought that we could keep the `CONFLICT (<type>)` constant
> > > > enough to serve as such a machine-parseable thing.
> > >
> > > That _probably_ is, but I thought you wanted to parse all N paths
> > > embedded in the message after that part as well?
> >
> > Actually, no, sorry for being unclear. My main aim with the
> > machine-parseable messages was to detect whether a given failed merge
> > contains _only_ conflicts of the sort that a particular UI can handle.
>
> ...and in order to do that, you'd need to parse all the filenames to
> determine whether files were involved in multiple conflict types, since
> you earlier suggested you would just bail on such conflicts (at
> https://lore.kernel.org/git/nycvar.QRO.7.76.6.2202211059430.26495@tvgsbejvaqbjf.bet/).

That's a good point. I had somehow still in my mind that the `CONFLICT
(<type>)` prefix would already give me enough data to decide whether to
bail out or not, but that prefix would only tell me about a single
`rename/rename` conflict, not about multiple such conflicts involving the
_same_ file.

> In particular, I think the "mod6" and "rrdd" and "rad" cases kind of
> triggered those comments, and rename/rename is involved in two of those.
>
> Further, even if you could assume there'd only be simple conflicts such
> as the simple rename/rename case, you'd still need the files so that you
> know _which_ of the modes/oids in the <Conflicted file info> section
> relate to the message you are looking at.  (As mentioned previously,
> sorting the <Conflicted file info> to attempt to put them together is
> not feasible and is a no-go.)

True.

> > > > And then presenting `<path>NUL<message>NUL` could have served my
> > > > use case well...
> > >
> > > Would it?  Wouldn't you need something more like
> > >
> > > <number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL
> > > ?
> >
> > Probably, you're right.
> >
> > > I mean, if rename/rename is what you want to handle, there are three
> > > paths in that message.  And you need to know all three paths in
> > > order to combine the relevant parts of the <Conflicted File Info>
> > > section together.
> > >
> > > (Also, while we're at it, I decided to throw a stable short-type
> > > description string (e.g. "CONFLICT (rename/rename)") in there, which
> > > will _probably_ be the first part of the message string but still
> > > allow us to change the message string later if we want.)
> >
> > Yes, I very much like that idea to keep the prefix in a format that we
> > can guarantee to be stable enough for applications (or server
> > backends) to rely on.
>
> Um, I explicitly avoided saying the prefix would be stable by
> introducing a separate short string just so that we could change the
> prefix later if wanted.  The short string is _probably_ the current
> prefix or something close to it, but that stable string wouldn't
> necessarily remain the prefix of the message string, since the entire
> message string would be allowed to change.

Okay.

> > [T]he proper thing to do is to go through the callers to `path_msg()`
> > so that we can provide that stable output you proposed. A couple of
> > thoughts about this:
> >
> > * These are not informal messages, i.e. I think we would need another flag
> >   that would then trigger another double-`NUL`-separated section to be
> >   shown, probably between the conflicted file info and the informational
> >   messages.
>
> Why would that be a separate section, though?

I thought you wanted to keep the informal messages human-readable, so
naturally I thought that the machine-readable part should be in a separate
section.

> While we don't want machines parsing the informal messages and trying to
> determine the components of that message, they definitely should be able
> to tell which informal messages are associated with which paths
> (otherwise how can you group the <Conflict message info> as per your
> needs and how do you know which message to show the user when dealing
> with that particular conflict?).  That requirement suggests the informal
> messages should be part of the same section.  Thus my suggestion to just
> make it be the <message> part at the end of my earlier suggestion of
>
> <number-of-paths>NUL<path1>NUL<path2>NUL<pathN>NUL<stable-short-type-description>NUL<message>NUL

Oh, okay.

Should this format be the one with `-z`, and the current format remain the
one being shown without `-z`?

> > * Instead of _adding_ the calls, we could look into refactoring the
> >   existing `path_msg()` calls, introducing yet another function that would
> >   call `path_msg()` but also optionally generate that machine-parseable
> >   conflict info.
>
> So of the two alternatives I suggested above, it sounds like you're in
> favor of the "or somehow change the path_msg() call itself to somehow
> take an additional arbitrary list of arguments representing the paths
> and short-desc we want to store somewhere" option I suggested.  I
> think I prefer that one too.

Yes!

> > * None of this needs to hold up `en/merge-tree`. I am sorry that I am the
> >   blocker (unintentionally so, I guarantee you that!).
>
> Sometimes I find it frustrating that changes don't merge down for
> months, particularly when the topic was already finished weeks and
> weeks (if not months) ago and/or when there's a follow-on series
> depending on it.  But in this case I have no follow-on series ready to
> send, and this topic has been under (very!) active discussion
> essentially the whole time.  And, perhaps even more importantly, I'd
> like to avoid solidifying the "machine parseable output format"
> prematurely and making it harder to work around later.  I think
> addressing this issue you bring up requires fundamentally redoing the
> informational messages section as per my proposal, and it makes me
> wonder what should even be shown without the -z option (thoughts
> welcome).

My suggestion is above: without `-z`, the messages should be identical to
what is shown now.

> So, this is one series where even if everyone else says to merge it
> already, I'd like to wait a bit longer on it until I feel confident we
> have a solution that handles at least the current usecases.

Fair enough, you're in charge of this series, and I really like what you
came up with.

My thinking was driven more by the users' side, as I am relatively eager
to integrate this into production, but am loathe to do that with an early
iteration of `en/merge-tree` that might be substantially revamped, still.

It can bog me down quite well if I have to adjust local (or not-so-local)
patch series to refactorings in core Git. For example, the other day, I
was blocked from more interesting work until I could resolve all kinds of
conflicts where core Git renaming `hash_object_file_literally()` to
`write_object_file_literally()` completely intersected with
actually-interesting `unsigned long` to `size_t` changes in Git for
Windows. And let me tell you, once you have to deal with such conflicts a
couple of times, being prevented from doing work that is actually fun, it
is relatively easy to become frustrated and think about those refactorings
as pointless and as useless churn, no matter how well-intended they may
be.

So my prodding is kind of selfish, as an `en/merge-tree` cherry-picked
from `next` would provide somewhat of a promise that I do _not_ have to
spend _so_ much time adjusting my code in case a new iteration with
substantial changes shows up ;-)

But you're right, let's tread wisely and only call this patch series done
only when we are reasonably certain that the machine-parseable output is
stable.

Speaking of which: Would you like me to give that `path_msg()` refactoring
a try, or are you already busy with it? (I cannot make any promises, but I
also do not want to lean on you, to leave you alone with that work, I
would feel really bad about that.)

Ciao,
Dscho
Johannes Schindelin May 13, 2022, 10:21 a.m. UTC | #23
Hi Elijah,

On Thu, 10 Mar 2022, Johannes Schindelin wrote:

> On Tue, 8 Mar 2022, Elijah Newren wrote:
>
> > So, this is one series where even if everyone else says to merge it
> > already, I'd like to wait a bit longer on it until I feel confident we
> > have a solution that handles at least the current usecases.
>
> Fair enough, you're in charge of this series, and I really like what you
> came up with.
>
> My thinking was driven more by the users' side, as I am relatively eager
> to integrate this into production, but am loathe to do that with an early
> iteration of `en/merge-tree` that might be substantially revamped, still.

I've been bogged down with things elsewhere, but should now have time to
help on this end.

Elijah, _is_ there anything I can help with?

Thanks,
Dscho
Elijah Newren May 17, 2022, 8:23 a.m. UTC | #24
Hi Johannes,

On Fri, May 13, 2022 at 3:21 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Thu, 10 Mar 2022, Johannes Schindelin wrote:
>
> > On Tue, 8 Mar 2022, Elijah Newren wrote:
> >
> > > So, this is one series where even if everyone else says to merge it
> > > already, I'd like to wait a bit longer on it until I feel confident we
> > > have a solution that handles at least the current usecases.
> >
> > Fair enough, you're in charge of this series, and I really like what you
> > came up with.
> >
> > My thinking was driven more by the users' side, as I am relatively eager
> > to integrate this into production, but am loathe to do that with an early
> > iteration of `en/merge-tree` that might be substantially revamped, still.
>
> I've been bogged down with things elsewhere, but should now have time to
> help on this end.
>
> Elijah, _is_ there anything I can help with?

Yeah, I've been bogged down with other things too; the little Git time
I've had has been spent responding to review requests or other things
folks manually were asking for my input on.

I think I got a fair amount of this implemented about a month or so
ago.  I just pushed up what I have to the wip-for-in-core-merge-tree
branch of newren/git.  Some notes:

  * A big "WIP" commit that needs to be broken up
  * The previous "output" member of merge_result, containing a strmap
of conflict and informational messages (basically a mapping of
filename -> strbuf) now needs to be replaced by a strmap "conflicts",
which is now a mapping of primary_filename -> logical_conflicts, and
logical_conflicts is an array of logical_conflict, and
logical_conflict has a type, array of paths, and message.
  * Since "output" is no longer part of merge_result, the new
remerge-diff functionality is going to need to be modified since it
used that field, and instead iterate on "conflicts" to get the same
information
  * I have some FIXME comments in a couple places where I need to
figure out how I want to pass the variable number of arguments (in a
function already accepting a variable number of arguments for other
reasons, making the function in a way have to variable length lists of
arguments)
  * The new enums and structs I added to merge-ort.c really have to be
added to merge-ort.h and become part of the API.  Feels a little
unfortunate since it'll make the API _much_ more involved, but I don't
see any other way to solve your usecase.

If you want to take a stab at the above, or even see if my changes
make sense (sorry for it all being squashed into one big commit and
not having good commit messages, but, you know...you did ask), that'd
be great.
Johannes Schindelin June 3, 2022, 10:11 p.m. UTC | #25
Hi Elijah,

On Tue, 17 May 2022, Elijah Newren wrote:

> On Fri, May 13, 2022 at 3:21 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > On Thu, 10 Mar 2022, Johannes Schindelin wrote:
> >
> > > On Tue, 8 Mar 2022, Elijah Newren wrote:
> > >
> > > > So, this is one series where even if everyone else says to merge it
> > > > already, I'd like to wait a bit longer on it until I feel confident we
> > > > have a solution that handles at least the current usecases.
> > >
> > > Fair enough, you're in charge of this series, and I really like what you
> > > came up with.
> > >
> > > My thinking was driven more by the users' side, as I am relatively eager
> > > to integrate this into production, but am loathe to do that with an early
> > > iteration of `en/merge-tree` that might be substantially revamped, still.
> >
> > I've been bogged down with things elsewhere, but should now have time to
> > help on this end.
> >
> > Elijah, _is_ there anything I can help with?
>
> Yeah, I've been bogged down with other things too; the little Git time
> I've had has been spent responding to review requests or other things
> folks manually were asking for my input on.
>
> I think I got a fair amount of this implemented about a month or so
> ago.  I just pushed up what I have to the wip-for-in-core-merge-tree
> branch of newren/git.

Thank you so much!

I worked a few hours on this and pushed up my changes under the same
branch name to dscho/git.

> Some notes:
>
>   * A big "WIP" commit that needs to be broken up

I did not yet start on that.

>   * The previous "output" member of merge_result, containing a strmap
> of conflict and informational messages (basically a mapping of
> filename -> strbuf) now needs to be replaced by a strmap "conflicts",
> which is now a mapping of primary_filename -> logical_conflicts, and
> logical_conflicts is an array of logical_conflict, and
> logical_conflict has a type, array of paths, and message.
>   * Since "output" is no longer part of merge_result, the new
> remerge-diff functionality is going to need to be modified since it
> used that field, and instead iterate on "conflicts" to get the same
> information

I punted on that for now, recreating an `output`-style strmap and storing
it as `path_messages` attribute.

>   * I have some FIXME comments in a couple places where I need to
> figure out how I want to pass the variable number of arguments (in a
> function already accepting a variable number of arguments for other
> reasons, making the function in a way have to variable length lists of
> arguments)

In my WIP fixups, I refactored this into a version that takes varargs and
another version that takes a string_list.

However, after getting all this to compile and t4301 to pass, I think we
actually only need a version that takes up to two "other" paths, and a
version that takes a string_list with those "other" paths, where the
former constructs a temporary string_list and then calls the latter.

>   * The new enums and structs I added to merge-ort.c really have to be
> added to merge-ort.h and become part of the API.  Feels a little
> unfortunate since it'll make the API _much_ more involved, but I don't
> see any other way to solve your usecase.

I agree, but I did not do that yet ;-)

Another thing I noticed is that we can probably ensure consistency between
the `conflict_and_info_types` enum and the `type_short_descriptions` array
by using the same C99 construct we're already using in the
`advice_setting` array in advice.c:

	static const char *type_short_descriptions[NB_CONFLICT_TYPES] = {
		/*** "Simple" conflicts and informational messages ***/
		[INFO_AUTO_MERGING] = "Auto-merging",
		[CONFLICT_CONTENTS] = "CONFLICT (contents)",
	[...]

> If you want to take a stab at the above, or even see if my changes
> make sense (sorry for it all being squashed into one big commit and
> not having good commit messages, but, you know...you did ask), that'd
> be great.

Yes, I did ask, and I did receive ;-)

Thank you so much! It would be great if you could have a quick look over
the commits I added on top of your branch, to see whether things make more
or less sense to you. But if you're too busy elsewhere, I am one of the
best persons to understand that, too.

Thanks!
Dscho
Johannes Schindelin June 5, 2022, 3:40 p.m. UTC | #26
Hi Elijah,

On Sat, 4 Jun 2022, Johannes Schindelin wrote:

> On Tue, 17 May 2022, Elijah Newren wrote:
>
> > I think I got a fair amount of this implemented about a month or so
> > ago.  I just pushed up what I have to the wip-for-in-core-merge-tree
> > branch of newren/git.
>
> Thank you so much!
>
> I worked a few hours on this and pushed up my changes under the same
> branch name to dscho/git.

And I worked on it some more, and pushed up the result (which passes the
CI build except for some problem caused by changes outside of Git's source
code) ;-)

> > Some notes:
> >
> >   * A big "WIP" commit that needs to be broken up
>
> I did not yet start on that.

Still no start on that yet.

> >   * The previous "output" member of merge_result, containing a strmap
> > of conflict and informational messages (basically a mapping of
> > filename -> strbuf) now needs to be replaced by a strmap "conflicts",
> > which is now a mapping of primary_filename -> logical_conflicts, and
> > logical_conflicts is an array of logical_conflict, and
> > logical_conflict has a type, array of paths, and message.
> >   * Since "output" is no longer part of merge_result, the new
> > remerge-diff functionality is going to need to be modified since it
> > used that field, and instead iterate on "conflicts" to get the same
> > information
>
> I punted on that for now, recreating an `output`-style strmap and storing
> it as `path_messages` attribute.

I still punted on that because I wanted to see whether I could address the
test suite failures first (narrator's voice: he could).

> >   * I have some FIXME comments in a couple places where I need to
> > figure out how I want to pass the variable number of arguments (in a
> > function already accepting a variable number of arguments for other
> > reasons, making the function in a way have to variable length lists of
> > arguments)
>
> In my WIP fixups, I refactored this into a version that takes varargs and
> another version that takes a string_list.
>
> However, after getting all this to compile and t4301 to pass, I think we
> actually only need a version that takes up to two "other" paths, and a
> version that takes a string_list with those "other" paths, where the
> former constructs a temporary string_list and then calls the latter.

I ended up refactoring the refactor. The `path_msg()` function now takes
the "other paths" in two different forms: it accepts two (optional) `const
char*` parameters and an (also optional) `struct string_list *`. Either of
them, if non-NULL, will be added to the `struct strvec` field.

This looks _slightly_ ugly to me, but from an implementation point is the
cleanest solution.

> >   * The new enums and structs I added to merge-ort.c really have to be
> > added to merge-ort.h and become part of the API.  Feels a little
> > unfortunate since it'll make the API _much_ more involved, but I don't
> > see any other way to solve your usecase.
>
> I agree, but I did not do that yet ;-)
>
> Another thing I noticed is that we can probably ensure consistency between
> the `conflict_and_info_types` enum and the `type_short_descriptions` array
> by using the same C99 construct we're already using in the
> `advice_setting` array in advice.c:
>
> 	static const char *type_short_descriptions[NB_CONFLICT_TYPES] = {
> 		/*** "Simple" conflicts and informational messages ***/
> 		[INFO_AUTO_MERGING] = "Auto-merging",
> 		[CONFLICT_CONTENTS] = "CONFLICT (contents)",
> 	[...]

Still haven't done that, either, as it is merely syntactic sugar, really,
and not really an interesting change. I think I'll leave that to a time
after I managed to modify the remerge-diff machinery to accept the
new-style `conflicts` map (instead of recreating the old-style `output`
map, as I am doing for now).

> It would be great if you could have a quick look over the commits I
> added on top of your branch, to see whether things make more or less
> sense to you. But if you're too busy elsewhere, I am one of the best
> persons to understand that, too.

Hopefully I will get this into a reviewable shape before you get to
looking at it, so that your time is spent more wisely than what I asked
for ;-)

Ciao,
Dscho
Johannes Schindelin June 5, 2022, 10:42 p.m. UTC | #27
Hi Elijah,

[sorry for sending this flurry of mails, I just wasn't sure how
consecutively I could work on the `merge-tree` patches, and therefore sent
mails whenever I had to take a break in case I wouldn't be able to get
back to this project for a couple of days.]

On Sun, 5 Jun 2022, Johannes Schindelin wrote:

> On Sat, 4 Jun 2022, Johannes Schindelin wrote:
>
> > On Tue, 17 May 2022, Elijah Newren wrote:
> >
> > > * The previous "output" member of merge_result, containing a strmap
> > > of conflict and informational messages (basically a mapping of
> > > filename -> strbuf) now needs to be replaced by a strmap
> > > "conflicts", which is now a mapping of primary_filename ->
> > > logical_conflicts, and logical_conflicts is an array of
> > > logical_conflict, and logical_conflict has a type, array of paths,
> > > and message.

I massaged this a bit further: since we no longer actually need a `strbuf`
there (we no longer append to the `strbuf` but instead to the list of
logical conflicts), I replaced `struct logical_conflicts` with a
`string_list` where each item contains the conflict message and its `util`
points to a `struct logical_merge_info` that contains the `type` and the
involved paths.

This lets me...

> > >   * Since "output" is no longer part of merge_result, the new
> > > remerge-diff functionality is going to need to be modified since it
> > > used that field, and instead iterate on "conflicts" to get the same
> > > information
> >
> > I punted on that for now, recreating an `output`-style strmap and storing
> > it as `path_messages` attribute.
>
> I still punted on that because I wanted to see whether I could address the
> test suite failures first (narrator's voice: he could).

... address this issue without resorting to declaring the `logical_merge`
struct in `merge-ort.h` (which would get a bit messy, not only because we
would have to `#include <strvec.h>` in that header because that struct
contains a `struct strvec paths` and therefore must know the storage size
of `strvec`, but also because `merge-ort.h` is not included in `diff.c`,
and it would be a bit iffy to do that).

Since the per-path conflicts are now stored as a `string_list`, and since
we only need the messages in remerge, we can continue to simply pass the
pointer to `strmap conflicts` to the remerge machinery (in the common
case, if pathspecs are involved, we continue to lightly copy-filter that
`strmap`).

> > >   * The new enums and structs I added to merge-ort.c really have to be
> > > added to merge-ort.h and become part of the API.  Feels a little
> > > unfortunate since it'll make the API _much_ more involved, but I don't
> > > see any other way to solve your usecase.
> >
> > I agree, but I did not do that yet ;-)

As mentioned above, I think the better way is to _not_ declare that enum
and structs in `merge-ort.h` and instead store the per-path conflicts as a
`string_list` whose strings contain the conflict message and whose `util`
contains the type and the list of involved paths.

This simplifies the API rather dramatically, since the current user
(remerge) is only interested in the conflict message, but not in the type
nor in the full list of involved paths.

Should we ever need to access the type or the paths outside of
`merge-ort.c`, it is easy enough to add a simple function to access that
information via the `string_list`'s `util`.

> > Another thing I noticed is that we can probably ensure consistency between
> > the `conflict_and_info_types` enum and the `type_short_descriptions` array
> > by using the same C99 construct we're already using in the
> > `advice_setting` array in advice.c:
> >
> > 	static const char *type_short_descriptions[NB_CONFLICT_TYPES] = {
> > 		/*** "Simple" conflicts and informational messages ***/
> > 		[INFO_AUTO_MERGING] = "Auto-merging",
> > 		[CONFLICT_CONTENTS] = "CONFLICT (contents)",
> > 	[...]
>
> Still haven't done that, either, as it is merely syntactic sugar, really,
> and not really an interesting change. I think I'll leave that to a time
> after I managed to modify the remerge-diff machinery to accept the
> new-style `conflicts` map (instead of recreating the old-style `output`
> map, as I am doing for now).

Since I addressed that `output` issue, I now also C99-ified the
`type_short_descriptions` array.

> > It would be great if you could have a quick look over the commits I
> > added on top of your branch, to see whether things make more or less
> > sense to you. But if you're too busy elsewhere, I am one of the best
> > persons to understand that, too.
>
> Hopefully I will get this into a reviewable shape before you get to
> looking at it, so that your time is spent more wisely than what I asked
> for ;-)

I hope to find some time to work on this more tomorrow; If not, I will get
back to the project on Wednesday and push it further.

Ciao,
Dscho
Johannes Schindelin June 6, 2022, 9:37 p.m. UTC | #28
Hi Elijah,

[after this update, I will shut up until you chime in, promise!]

On Mon, 6 Jun 2022, Johannes Schindelin wrote:

> On Sun, 5 Jun 2022, Johannes Schindelin wrote:
>
> > On Sat, 4 Jun 2022, Johannes Schindelin wrote:
> >
> > > It would be great if you could have a quick look over the commits I
> > > added on top of your branch, to see whether things make more or less
> > > sense to you. But if you're too busy elsewhere, I am one of the best
> > > persons to understand that, too.
> >
> > Hopefully I will get this into a reviewable shape before you get to
> > looking at it, so that your time is spent more wisely than what I
> > asked for ;-)
>
> I hope to find some time to work on this more tomorrow; If not, I will
> get back to the project on Wednesday and push it further.

I did get a chance, and polished the patch series a bit. I also rebased it
onto the current tip of Git's main branch, mainly to address some merge
conflicts preemptively. The result is pushed up to
https://github.com/dscho/git/tree/js/in-core-merge-tree. This is the
range-diff relative to `newren/wip-for-in-core-merge-tree`:

-- snip --
 1:  cccb3888070 <  -:  ----------- tmp-objdir: new API for creating temporary writable databases
 2:  4e44121c2d7 <  -:  ----------- tmp-objdir: disable ref updates when replacing the primary odb
 3:  0b94724311d <  -:  ----------- show, log: provide a --remerge-diff capability
 4:  f06de6c1b2f <  -:  ----------- log: clean unneeded objects during `log --remerge-diff`
 5:  8d6c3d48f0e <  -:  ----------- ll-merge: make callers responsible for showing warnings
 6:  de8e8f88fa4 <  -:  ----------- merge-ort: capture and print ll-merge warnings in our preferred fashion
 7:  6b535a4d55a <  -:  ----------- merge-ort: mark a few more conflict messages as omittable
 8:  e2441608c63 <  -:  ----------- merge-ort: format messages slightly different for use in headers
 9:  62734beb693 <  -:  ----------- diff: add ability to insert additional headers for paths
10:  17eccf7e0d6 <  -:  ----------- show, log: include conflict/warning messages in --remerge-diff headers
11:  b3e7656cfc6 <  -:  ----------- merge-ort: mark conflict/warning messages from inner merges as omittable
12:  ea5df61cf35 <  -:  ----------- diff-merges: avoid history simplifications when diffing merges
13:  4a7cd5542bb =  1:  8fb51817ed4 merge-tree: rename merge_trees() to trivial_merge_trees()
14:  4780ff6784d =  2:  8e0a79fa1ad merge-tree: move logic for existing merge into new function
15:  60253745f5c =  3:  baf0950bcb6 merge-tree: add option parsing and initial shell for real merge function
16:  f8266d39c1b =  4:  697470e50ae merge-tree: implement real merges
17:  6629af14919 =  5:  069af1ecc30 merge-ort: split out a separate display_update_messages() function
18:  17b57efb714 =  6:  53c92a5d8d9 merge-tree: support including merge messages in output
19:  4c8f42372dd =  7:  67a728d35f0 merge-ort: provide a merge_get_conflicted_files() helper function
25:  8fe5be07cd0 !  8:  6419487e26b merge-ort: remove command-line-centric submodule message from merge-ort
    @@ merge-ort.c: static int merge_submodule(struct merge_options *opt,
      		strbuf_release(&sb);
      		break;
      	default:
    +
    + ## t/t6437-submodule-merge.sh ##
    +@@ t/t6437-submodule-merge.sh: test_expect_success 'merging should conflict for non fast-forward' '
    + 	(cd merge-search &&
    + 	 git checkout -b test-nonforward b &&
    + 	 (cd sub &&
    +-	  git rev-parse sub-d > ../expect) &&
    ++	  git rev-parse --short sub-d > ../expect) &&
    + 	  if test "$GIT_TEST_MERGE_ALGORITHM" = ort
    + 	  then
    + 		test_must_fail git merge c >actual
20:  7b1ee417f3d !  9:  c92b81e7366 merge-tree: provide a list of which files have conflicts
    @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
     +		string_list_clear(&conflicted_files, 1);
     +	}
      	if (o->show_messages) {
    - 		printf("\n");
    +-		printf("\n");
    ++		putchar(line_termination);
      		merge_display_update_messages(&opt, &result);
    + 	}
    + 	merge_finalize(&opt, &result);
     @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)

      	/* Do the relevant type of merge */
21:  f1231a8fbc8 = 10:  d7360f58f16 merge-tree: provide easy access to `ls-files -u` style info
 -:  ----------- > 11:  b53ef9c2116 merge-ort: store messages in a list, not in a single strbuf
 -:  ----------- > 12:  b16d570d248 merge-ort: make `path_messages` a strmap to a string_list
 -:  ----------- > 13:  b575a6b5f8a merge-ort: store more specific conflict information
 -:  ----------- > 14:  4f245cc28ae merge-ort: optionally produce machine-readable output
22:  22297e6ce75 ! 15:  6a369f837be merge-tree: allow `ls-files -u` style info to be NUL terminated
    @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
      	if (!result.clean) {
      		struct string_list conflicted_files = STRING_LIST_INIT_NODUP;
      		const char *last = NULL;
    -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
    - 		string_list_clear(&conflicted_files, 1);
    - 	}
    - 	if (o->show_messages) {
    --		printf("\n");
    -+		putchar(line_termination);
    - 		merge_display_update_messages(&opt, &result);
    - 	}
    - 	merge_finalize(&opt, &result);
     @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
      			    N_("do a trivial merge only"), MODE_TRIVIAL),
      		OPT_BOOL(0, "messages", &o.show_messages,
    @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
     +
     +	test_expect_code 1 git merge-tree --write-tree -z tweak1 side2 >out &&
     +	anonymize_hash out >actual &&
    ++	printf "\\n" >>actual &&
     +
     +	# Expected results:
     +	#   "greeting" should merge with conflicts
    @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
     +
     +	EOF
     +
    -+	cat <<-EOF >>expect &&
    -+	Auto-merging greeting
    -+	CONFLICT (content): Merge conflict in greeting
    -+	CONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
    -+	CONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
    -+	Auto-merging Αυτά μου φαίνονται κινέζικα
    -+	CONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
    ++	q_to_nul <<-EOF >>expect &&
    ++	1QgreetingQAuto-mergingQAuto-merging greeting
    ++	Q1QgreetingQCONFLICT (contents)QCONFLICT (content): Merge conflict in greeting
    ++	Q2Qwhatever~tweak1QwhateverQCONFLICT (file/directory)QCONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
    ++	Q1Qwhatever~tweak1QCONFLICT (modify/delete)QCONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
    ++	Q1QΑυτά μου φαίνονται κινέζικαQAuto-mergingQAuto-merging Αυτά μου φαίνονται κινέζικα
    ++	Q1QΑυτά μου φαίνονται κινέζικαQCONFLICT (contents)QCONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
    ++	Q
     +	EOF
     +
     +	test_cmp expect actual
23:  db73c6dd823 = 16:  47146dd59dd merge-tree: add a --allow-unrelated-histories flag
24:  d58a7c7a9f6 ! 17:  3ce28f6fd97 git-merge-tree.txt: add a section on potentional usage mistakes
    @@ Documentation/git-merge-tree.txt: with linkgit:git-merge[1]:
     +<<IM,Informational messages>> section has the necessary info, though it
     +is not designed to be machine parseable.
     +
    ++Do NOT assume that each paths from <<CFI,Conflicted file info>>, and
    ++the logical conflicts in the <<IM,Informational messages>> have a
    ++one-to-one mapping, nor that there is a one-to-many mapping, nor a
    ++many-to-one mapping.  Many-to-many mappings exist, meaning that each
    ++path can have many logical conflict types in a single merge, and each
    ++logical conflict type can affect many paths.
    ++
     +Do NOT assume all filenames listed in the <<IM,Informational messages>>
     +section had conflicts.  Messages can be included for files that have no
     +conflicts, such as "Auto-merging <file>".
26:  78e1243eca1 <  -:  ----------- WIP
-- snap --

I am pretty happy with the current state of the patches, and hope that we
can push this patch series over the finish line.

If you can think of anything I can do to help with this, please do let me
know, I am _very_ interested in getting this done, and finally am in a
position to help.

Ciao,
Dscho
Elijah Newren June 7, 2022, 7:38 a.m. UTC | #29
Hi Dscho,

On Mon, Jun 6, 2022 at 2:37 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> [after this update, I will shut up until you chime in, promise!]
>
> On Mon, 6 Jun 2022, Johannes Schindelin wrote:
>
> > On Sun, 5 Jun 2022, Johannes Schindelin wrote:
> >
> > > On Sat, 4 Jun 2022, Johannes Schindelin wrote:
> > >
> > > > It would be great if you could have a quick look over the commits I
> > > > added on top of your branch, to see whether things make more or less
> > > > sense to you. But if you're too busy elsewhere, I am one of the best
> > > > persons to understand that, too.
> > >
> > > Hopefully I will get this into a reviewable shape before you get to
> > > looking at it, so that your time is spent more wisely than what I
> > > asked for ;-)
> >
> > I hope to find some time to work on this more tomorrow; If not, I will
> > get back to the project on Wednesday and push it further.
>
> I did get a chance, and polished the patch series a bit. I also rebased it
> onto the current tip of Git's main branch, mainly to address some merge
> conflicts preemptively. The result is pushed up to
> https://github.com/dscho/git/tree/js/in-core-merge-tree. This is the
> range-diff relative to `newren/wip-for-in-core-merge-tree`:

This is so awesome; thanks for working on this.  Sorry I haven't had
time to review yet, but I'm hoping to be able to near the end of this
week.  I'm excited to see how it looks.  :-)

> -- snip --
>  1:  cccb3888070 <  -:  ----------- tmp-objdir: new API for creating temporary writable databases
>  2:  4e44121c2d7 <  -:  ----------- tmp-objdir: disable ref updates when replacing the primary odb
>  3:  0b94724311d <  -:  ----------- show, log: provide a --remerge-diff capability
>  4:  f06de6c1b2f <  -:  ----------- log: clean unneeded objects during `log --remerge-diff`
>  5:  8d6c3d48f0e <  -:  ----------- ll-merge: make callers responsible for showing warnings
>  6:  de8e8f88fa4 <  -:  ----------- merge-ort: capture and print ll-merge warnings in our preferred fashion
>  7:  6b535a4d55a <  -:  ----------- merge-ort: mark a few more conflict messages as omittable
>  8:  e2441608c63 <  -:  ----------- merge-ort: format messages slightly different for use in headers
>  9:  62734beb693 <  -:  ----------- diff: add ability to insert additional headers for paths
> 10:  17eccf7e0d6 <  -:  ----------- show, log: include conflict/warning messages in --remerge-diff headers
> 11:  b3e7656cfc6 <  -:  ----------- merge-ort: mark conflict/warning messages from inner merges as omittable
> 12:  ea5df61cf35 <  -:  ----------- diff-merges: avoid history simplifications when diffing merges
> 13:  4a7cd5542bb =  1:  8fb51817ed4 merge-tree: rename merge_trees() to trivial_merge_trees()
> 14:  4780ff6784d =  2:  8e0a79fa1ad merge-tree: move logic for existing merge into new function
> 15:  60253745f5c =  3:  baf0950bcb6 merge-tree: add option parsing and initial shell for real merge function
> 16:  f8266d39c1b =  4:  697470e50ae merge-tree: implement real merges
> 17:  6629af14919 =  5:  069af1ecc30 merge-ort: split out a separate display_update_messages() function
> 18:  17b57efb714 =  6:  53c92a5d8d9 merge-tree: support including merge messages in output
> 19:  4c8f42372dd =  7:  67a728d35f0 merge-ort: provide a merge_get_conflicted_files() helper function
> 25:  8fe5be07cd0 !  8:  6419487e26b merge-ort: remove command-line-centric submodule message from merge-ort
>     @@ merge-ort.c: static int merge_submodule(struct merge_options *opt,
>                 strbuf_release(&sb);
>                 break;
>         default:
>     +
>     + ## t/t6437-submodule-merge.sh ##
>     +@@ t/t6437-submodule-merge.sh: test_expect_success 'merging should conflict for non fast-forward' '
>     +   (cd merge-search &&
>     +    git checkout -b test-nonforward b &&
>     +    (cd sub &&
>     +-    git rev-parse sub-d > ../expect) &&
>     ++    git rev-parse --short sub-d > ../expect) &&
>     +     if test "$GIT_TEST_MERGE_ALGORITHM" = ort
>     +     then
>     +           test_must_fail git merge c >actual
> 20:  7b1ee417f3d !  9:  c92b81e7366 merge-tree: provide a list of which files have conflicts
>     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
>      +          string_list_clear(&conflicted_files, 1);
>      +  }
>         if (o->show_messages) {
>     -           printf("\n");
>     +-          printf("\n");
>     ++          putchar(line_termination);
>                 merge_display_update_messages(&opt, &result);
>     +   }
>     +   merge_finalize(&opt, &result);
>      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
>
>         /* Do the relevant type of merge */
> 21:  f1231a8fbc8 = 10:  d7360f58f16 merge-tree: provide easy access to `ls-files -u` style info
>  -:  ----------- > 11:  b53ef9c2116 merge-ort: store messages in a list, not in a single strbuf
>  -:  ----------- > 12:  b16d570d248 merge-ort: make `path_messages` a strmap to a string_list
>  -:  ----------- > 13:  b575a6b5f8a merge-ort: store more specific conflict information
>  -:  ----------- > 14:  4f245cc28ae merge-ort: optionally produce machine-readable output
> 22:  22297e6ce75 ! 15:  6a369f837be merge-tree: allow `ls-files -u` style info to be NUL terminated
>     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
>         if (!result.clean) {
>                 struct string_list conflicted_files = STRING_LIST_INIT_NODUP;
>                 const char *last = NULL;
>     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
>     -           string_list_clear(&conflicted_files, 1);
>     -   }
>     -   if (o->show_messages) {
>     --          printf("\n");
>     -+          putchar(line_termination);
>     -           merge_display_update_messages(&opt, &result);
>     -   }
>     -   merge_finalize(&opt, &result);
>      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
>                             N_("do a trivial merge only"), MODE_TRIVIAL),
>                 OPT_BOOL(0, "messages", &o.show_messages,
>     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
>      +
>      +  test_expect_code 1 git merge-tree --write-tree -z tweak1 side2 >out &&
>      +  anonymize_hash out >actual &&
>     ++  printf "\\n" >>actual &&
>      +
>      +  # Expected results:
>      +  #   "greeting" should merge with conflicts
>     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
>      +
>      +  EOF
>      +
>     -+  cat <<-EOF >>expect &&
>     -+  Auto-merging greeting
>     -+  CONFLICT (content): Merge conflict in greeting
>     -+  CONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
>     -+  CONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
>     -+  Auto-merging Αυτά μου φαίνονται κινέζικα
>     -+  CONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
>     ++  q_to_nul <<-EOF >>expect &&
>     ++  1QgreetingQAuto-mergingQAuto-merging greeting
>     ++  Q1QgreetingQCONFLICT (contents)QCONFLICT (content): Merge conflict in greeting
>     ++  Q2Qwhatever~tweak1QwhateverQCONFLICT (file/directory)QCONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
>     ++  Q1Qwhatever~tweak1QCONFLICT (modify/delete)QCONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
>     ++  Q1QΑυτά μου φαίνονται κινέζικαQAuto-mergingQAuto-merging Αυτά μου φαίνονται κινέζικα
>     ++  Q1QΑυτά μου φαίνονται κινέζικαQCONFLICT (contents)QCONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
>     ++  Q
>      +  EOF
>      +
>      +  test_cmp expect actual
> 23:  db73c6dd823 = 16:  47146dd59dd merge-tree: add a --allow-unrelated-histories flag
> 24:  d58a7c7a9f6 ! 17:  3ce28f6fd97 git-merge-tree.txt: add a section on potentional usage mistakes
>     @@ Documentation/git-merge-tree.txt: with linkgit:git-merge[1]:
>      +<<IM,Informational messages>> section has the necessary info, though it
>      +is not designed to be machine parseable.
>      +
>     ++Do NOT assume that each paths from <<CFI,Conflicted file info>>, and
>     ++the logical conflicts in the <<IM,Informational messages>> have a
>     ++one-to-one mapping, nor that there is a one-to-many mapping, nor a
>     ++many-to-one mapping.  Many-to-many mappings exist, meaning that each
>     ++path can have many logical conflict types in a single merge, and each
>     ++logical conflict type can affect many paths.
>     ++
>      +Do NOT assume all filenames listed in the <<IM,Informational messages>>
>      +section had conflicts.  Messages can be included for files that have no
>      +conflicts, such as "Auto-merging <file>".
> 26:  78e1243eca1 <  -:  ----------- WIP
> -- snap --
>
> I am pretty happy with the current state of the patches, and hope that we
> can push this patch series over the finish line.
>
> If you can think of anything I can do to help with this, please do let me
> know, I am _very_ interested in getting this done, and finally am in a
> position to help.

Very much appreciated.  Looks like you're now blocking on my review,
so I'll try to make some time by end of week to look over things.
Elijah Newren June 17, 2022, 11:44 p.m. UTC | #30
On Tue, Jun 7, 2022 at 12:38 AM Elijah Newren <newren@gmail.com> wrote:
>
> Hi Dscho,
>
> On Mon, Jun 6, 2022 at 2:37 PM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > Hi Elijah,
> >
> > [after this update, I will shut up until you chime in, promise!]
> >
> > On Mon, 6 Jun 2022, Johannes Schindelin wrote:
> >
> > > On Sun, 5 Jun 2022, Johannes Schindelin wrote:
> > >
> > > > On Sat, 4 Jun 2022, Johannes Schindelin wrote:
> > > >
> > > > > It would be great if you could have a quick look over the commits I
> > > > > added on top of your branch, to see whether things make more or less
> > > > > sense to you. But if you're too busy elsewhere, I am one of the best
> > > > > persons to understand that, too.
> > > >
> > > > Hopefully I will get this into a reviewable shape before you get to
> > > > looking at it, so that your time is spent more wisely than what I
> > > > asked for ;-)
> > >
> > > I hope to find some time to work on this more tomorrow; If not, I will
> > > get back to the project on Wednesday and push it further.
> >
> > I did get a chance, and polished the patch series a bit. I also rebased it
> > onto the current tip of Git's main branch, mainly to address some merge
> > conflicts preemptively. The result is pushed up to
> > https://github.com/dscho/git/tree/js/in-core-merge-tree. This is the
> > range-diff relative to `newren/wip-for-in-core-merge-tree`:
>
> This is so awesome; thanks for working on this.  Sorry I haven't had
> time to review yet, but I'm hoping to be able to near the end of this
> week.  I'm excited to see how it looks.  :-)
>
> > -- snip --
> >  1:  cccb3888070 <  -:  ----------- tmp-objdir: new API for creating temporary writable databases
> >  2:  4e44121c2d7 <  -:  ----------- tmp-objdir: disable ref updates when replacing the primary odb
> >  3:  0b94724311d <  -:  ----------- show, log: provide a --remerge-diff capability
> >  4:  f06de6c1b2f <  -:  ----------- log: clean unneeded objects during `log --remerge-diff`
> >  5:  8d6c3d48f0e <  -:  ----------- ll-merge: make callers responsible for showing warnings
> >  6:  de8e8f88fa4 <  -:  ----------- merge-ort: capture and print ll-merge warnings in our preferred fashion
> >  7:  6b535a4d55a <  -:  ----------- merge-ort: mark a few more conflict messages as omittable
> >  8:  e2441608c63 <  -:  ----------- merge-ort: format messages slightly different for use in headers
> >  9:  62734beb693 <  -:  ----------- diff: add ability to insert additional headers for paths
> > 10:  17eccf7e0d6 <  -:  ----------- show, log: include conflict/warning messages in --remerge-diff headers
> > 11:  b3e7656cfc6 <  -:  ----------- merge-ort: mark conflict/warning messages from inner merges as omittable
> > 12:  ea5df61cf35 <  -:  ----------- diff-merges: avoid history simplifications when diffing merges
> > 13:  4a7cd5542bb =  1:  8fb51817ed4 merge-tree: rename merge_trees() to trivial_merge_trees()
> > 14:  4780ff6784d =  2:  8e0a79fa1ad merge-tree: move logic for existing merge into new function
> > 15:  60253745f5c =  3:  baf0950bcb6 merge-tree: add option parsing and initial shell for real merge function
> > 16:  f8266d39c1b =  4:  697470e50ae merge-tree: implement real merges
> > 17:  6629af14919 =  5:  069af1ecc30 merge-ort: split out a separate display_update_messages() function
> > 18:  17b57efb714 =  6:  53c92a5d8d9 merge-tree: support including merge messages in output
> > 19:  4c8f42372dd =  7:  67a728d35f0 merge-ort: provide a merge_get_conflicted_files() helper function
> > 25:  8fe5be07cd0 !  8:  6419487e26b merge-ort: remove command-line-centric submodule message from merge-ort
> >     @@ merge-ort.c: static int merge_submodule(struct merge_options *opt,
> >                 strbuf_release(&sb);
> >                 break;
> >         default:
> >     +
> >     + ## t/t6437-submodule-merge.sh ##
> >     +@@ t/t6437-submodule-merge.sh: test_expect_success 'merging should conflict for non fast-forward' '
> >     +   (cd merge-search &&
> >     +    git checkout -b test-nonforward b &&
> >     +    (cd sub &&
> >     +-    git rev-parse sub-d > ../expect) &&
> >     ++    git rev-parse --short sub-d > ../expect) &&
> >     +     if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> >     +     then
> >     +           test_must_fail git merge c >actual
> > 20:  7b1ee417f3d !  9:  c92b81e7366 merge-tree: provide a list of which files have conflicts
> >     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> >      +          string_list_clear(&conflicted_files, 1);
> >      +  }
> >         if (o->show_messages) {
> >     -           printf("\n");
> >     +-          printf("\n");
> >     ++          putchar(line_termination);
> >                 merge_display_update_messages(&opt, &result);
> >     +   }
> >     +   merge_finalize(&opt, &result);
> >      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
> >
> >         /* Do the relevant type of merge */
> > 21:  f1231a8fbc8 = 10:  d7360f58f16 merge-tree: provide easy access to `ls-files -u` style info
> >  -:  ----------- > 11:  b53ef9c2116 merge-ort: store messages in a list, not in a single strbuf
> >  -:  ----------- > 12:  b16d570d248 merge-ort: make `path_messages` a strmap to a string_list
> >  -:  ----------- > 13:  b575a6b5f8a merge-ort: store more specific conflict information
> >  -:  ----------- > 14:  4f245cc28ae merge-ort: optionally produce machine-readable output
> > 22:  22297e6ce75 ! 15:  6a369f837be merge-tree: allow `ls-files -u` style info to be NUL terminated
> >     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> >         if (!result.clean) {
> >                 struct string_list conflicted_files = STRING_LIST_INIT_NODUP;
> >                 const char *last = NULL;
> >     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> >     -           string_list_clear(&conflicted_files, 1);
> >     -   }
> >     -   if (o->show_messages) {
> >     --          printf("\n");
> >     -+          putchar(line_termination);
> >     -           merge_display_update_messages(&opt, &result);
> >     -   }
> >     -   merge_finalize(&opt, &result);
> >      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
> >                             N_("do a trivial merge only"), MODE_TRIVIAL),
> >                 OPT_BOOL(0, "messages", &o.show_messages,
> >     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
> >      +
> >      +  test_expect_code 1 git merge-tree --write-tree -z tweak1 side2 >out &&
> >      +  anonymize_hash out >actual &&
> >     ++  printf "\\n" >>actual &&
> >      +
> >      +  # Expected results:
> >      +  #   "greeting" should merge with conflicts
> >     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
> >      +
> >      +  EOF
> >      +
> >     -+  cat <<-EOF >>expect &&
> >     -+  Auto-merging greeting
> >     -+  CONFLICT (content): Merge conflict in greeting
> >     -+  CONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
> >     -+  CONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
> >     -+  Auto-merging Αυτά μου φαίνονται κινέζικα
> >     -+  CONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
> >     ++  q_to_nul <<-EOF >>expect &&
> >     ++  1QgreetingQAuto-mergingQAuto-merging greeting
> >     ++  Q1QgreetingQCONFLICT (contents)QCONFLICT (content): Merge conflict in greeting
> >     ++  Q2Qwhatever~tweak1QwhateverQCONFLICT (file/directory)QCONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
> >     ++  Q1Qwhatever~tweak1QCONFLICT (modify/delete)QCONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
> >     ++  Q1QΑυτά μου φαίνονται κινέζικαQAuto-mergingQAuto-merging Αυτά μου φαίνονται κινέζικα
> >     ++  Q1QΑυτά μου φαίνονται κινέζικαQCONFLICT (contents)QCONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
> >     ++  Q
> >      +  EOF
> >      +
> >      +  test_cmp expect actual
> > 23:  db73c6dd823 = 16:  47146dd59dd merge-tree: add a --allow-unrelated-histories flag
> > 24:  d58a7c7a9f6 ! 17:  3ce28f6fd97 git-merge-tree.txt: add a section on potentional usage mistakes
> >     @@ Documentation/git-merge-tree.txt: with linkgit:git-merge[1]:
> >      +<<IM,Informational messages>> section has the necessary info, though it
> >      +is not designed to be machine parseable.
> >      +
> >     ++Do NOT assume that each paths from <<CFI,Conflicted file info>>, and
> >     ++the logical conflicts in the <<IM,Informational messages>> have a
> >     ++one-to-one mapping, nor that there is a one-to-many mapping, nor a
> >     ++many-to-one mapping.  Many-to-many mappings exist, meaning that each
> >     ++path can have many logical conflict types in a single merge, and each
> >     ++logical conflict type can affect many paths.
> >     ++
> >      +Do NOT assume all filenames listed in the <<IM,Informational messages>>
> >      +section had conflicts.  Messages can be included for files that have no
> >      +conflicts, such as "Auto-merging <file>".
> > 26:  78e1243eca1 <  -:  ----------- WIP
> > -- snap --
> >
> > I am pretty happy with the current state of the patches, and hope that we
> > can push this patch series over the finish line.
> >
> > If you can think of anything I can do to help with this, please do let me
> > know, I am _very_ interested in getting this done, and finally am in a
> > position to help.
>
> Very much appreciated.  Looks like you're now blocking on my review,
> so I'll try to make some time by end of week to look over things.

Sorry for the delay.  However, I have looked over the changes in
detail.  Well done!  I very much like this version.  Thanks for taking
the time to break up my WIP patches, fix my bugs, address all the
FIXMEs, and then going and adding some nice cleanups and improvements
on top!  I'll add my Signed-off-by on a couple patches and have
gitgitgadget submit this round.
Johannes Schindelin June 18, 2022, 9:58 p.m. UTC | #31
Hi Elijah,

On Fri, 17 Jun 2022, Elijah Newren wrote:

> On Tue, Jun 7, 2022 at 12:38 AM Elijah Newren <newren@gmail.com> wrote:
> >
> > On Mon, Jun 6, 2022 at 2:37 PM Johannes Schindelin
> > <Johannes.Schindelin@gmx.de> wrote:
> > >
> > > [after this update, I will shut up until you chime in, promise!]
> > >
> > > On Mon, 6 Jun 2022, Johannes Schindelin wrote:
> > >
> > > > On Sun, 5 Jun 2022, Johannes Schindelin wrote:
> > > >
> > > > > On Sat, 4 Jun 2022, Johannes Schindelin wrote:
> > > > >
> > > > > > It would be great if you could have a quick look over the commits I
> > > > > > added on top of your branch, to see whether things make more or less
> > > > > > sense to you. But if you're too busy elsewhere, I am one of the best
> > > > > > persons to understand that, too.
> > > > >
> > > > > Hopefully I will get this into a reviewable shape before you get to
> > > > > looking at it, so that your time is spent more wisely than what I
> > > > > asked for ;-)
> > > >
> > > > I hope to find some time to work on this more tomorrow; If not, I will
> > > > get back to the project on Wednesday and push it further.
> > >
> > > I did get a chance, and polished the patch series a bit. I also rebased it
> > > onto the current tip of Git's main branch, mainly to address some merge
> > > conflicts preemptively. The result is pushed up to
> > > https://github.com/dscho/git/tree/js/in-core-merge-tree. This is the
> > > range-diff relative to `newren/wip-for-in-core-merge-tree`:
> >
> > This is so awesome; thanks for working on this.  Sorry I haven't had
> > time to review yet, but I'm hoping to be able to near the end of this
> > week.  I'm excited to see how it looks.  :-)
> >
> > > -- snip --
> > >  1:  cccb3888070 <  -:  ----------- tmp-objdir: new API for creating temporary writable databases
> > >  2:  4e44121c2d7 <  -:  ----------- tmp-objdir: disable ref updates when replacing the primary odb
> > >  3:  0b94724311d <  -:  ----------- show, log: provide a --remerge-diff capability
> > >  4:  f06de6c1b2f <  -:  ----------- log: clean unneeded objects during `log --remerge-diff`
> > >  5:  8d6c3d48f0e <  -:  ----------- ll-merge: make callers responsible for showing warnings
> > >  6:  de8e8f88fa4 <  -:  ----------- merge-ort: capture and print ll-merge warnings in our preferred fashion
> > >  7:  6b535a4d55a <  -:  ----------- merge-ort: mark a few more conflict messages as omittable
> > >  8:  e2441608c63 <  -:  ----------- merge-ort: format messages slightly different for use in headers
> > >  9:  62734beb693 <  -:  ----------- diff: add ability to insert additional headers for paths
> > > 10:  17eccf7e0d6 <  -:  ----------- show, log: include conflict/warning messages in --remerge-diff headers
> > > 11:  b3e7656cfc6 <  -:  ----------- merge-ort: mark conflict/warning messages from inner merges as omittable
> > > 12:  ea5df61cf35 <  -:  ----------- diff-merges: avoid history simplifications when diffing merges
> > > 13:  4a7cd5542bb =  1:  8fb51817ed4 merge-tree: rename merge_trees() to trivial_merge_trees()
> > > 14:  4780ff6784d =  2:  8e0a79fa1ad merge-tree: move logic for existing merge into new function
> > > 15:  60253745f5c =  3:  baf0950bcb6 merge-tree: add option parsing and initial shell for real merge function
> > > 16:  f8266d39c1b =  4:  697470e50ae merge-tree: implement real merges
> > > 17:  6629af14919 =  5:  069af1ecc30 merge-ort: split out a separate display_update_messages() function
> > > 18:  17b57efb714 =  6:  53c92a5d8d9 merge-tree: support including merge messages in output
> > > 19:  4c8f42372dd =  7:  67a728d35f0 merge-ort: provide a merge_get_conflicted_files() helper function
> > > 25:  8fe5be07cd0 !  8:  6419487e26b merge-ort: remove command-line-centric submodule message from merge-ort
> > >     @@ merge-ort.c: static int merge_submodule(struct merge_options *opt,
> > >                 strbuf_release(&sb);
> > >                 break;
> > >         default:
> > >     +
> > >     + ## t/t6437-submodule-merge.sh ##
> > >     +@@ t/t6437-submodule-merge.sh: test_expect_success 'merging should conflict for non fast-forward' '
> > >     +   (cd merge-search &&
> > >     +    git checkout -b test-nonforward b &&
> > >     +    (cd sub &&
> > >     +-    git rev-parse sub-d > ../expect) &&
> > >     ++    git rev-parse --short sub-d > ../expect) &&
> > >     +     if test "$GIT_TEST_MERGE_ALGORITHM" = ort
> > >     +     then
> > >     +           test_must_fail git merge c >actual
> > > 20:  7b1ee417f3d !  9:  c92b81e7366 merge-tree: provide a list of which files have conflicts
> > >     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> > >      +          string_list_clear(&conflicted_files, 1);
> > >      +  }
> > >         if (o->show_messages) {
> > >     -           printf("\n");
> > >     +-          printf("\n");
> > >     ++          putchar(line_termination);
> > >                 merge_display_update_messages(&opt, &result);
> > >     +   }
> > >     +   merge_finalize(&opt, &result);
> > >      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
> > >
> > >         /* Do the relevant type of merge */
> > > 21:  f1231a8fbc8 = 10:  d7360f58f16 merge-tree: provide easy access to `ls-files -u` style info
> > >  -:  ----------- > 11:  b53ef9c2116 merge-ort: store messages in a list, not in a single strbuf
> > >  -:  ----------- > 12:  b16d570d248 merge-ort: make `path_messages` a strmap to a string_list
> > >  -:  ----------- > 13:  b575a6b5f8a merge-ort: store more specific conflict information
> > >  -:  ----------- > 14:  4f245cc28ae merge-ort: optionally produce machine-readable output
> > > 22:  22297e6ce75 ! 15:  6a369f837be merge-tree: allow `ls-files -u` style info to be NUL terminated
> > >     @@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> > >         if (!result.clean) {
> > >                 struct string_list conflicted_files = STRING_LIST_INIT_NODUP;
> > >                 const char *last = NULL;
> > >     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
> > >     -           string_list_clear(&conflicted_files, 1);
> > >     -   }
> > >     -   if (o->show_messages) {
> > >     --          printf("\n");
> > >     -+          putchar(line_termination);
> > >     -           merge_display_update_messages(&opt, &result);
> > >     -   }
> > >     -   merge_finalize(&opt, &result);
> > >      @@ builtin/merge-tree.c: int cmd_merge_tree(int argc, const char **argv, const char *prefix)
> > >                             N_("do a trivial merge only"), MODE_TRIVIAL),
> > >                 OPT_BOOL(0, "messages", &o.show_messages,
> > >     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
> > >      +
> > >      +  test_expect_code 1 git merge-tree --write-tree -z tweak1 side2 >out &&
> > >      +  anonymize_hash out >actual &&
> > >     ++  printf "\\n" >>actual &&
> > >      +
> > >      +  # Expected results:
> > >      +  #   "greeting" should merge with conflicts
> > >     @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'Check conflicted oids and
> > >      +
> > >      +  EOF
> > >      +
> > >     -+  cat <<-EOF >>expect &&
> > >     -+  Auto-merging greeting
> > >     -+  CONFLICT (content): Merge conflict in greeting
> > >     -+  CONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
> > >     -+  CONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
> > >     -+  Auto-merging Αυτά μου φαίνονται κινέζικα
> > >     -+  CONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
> > >     ++  q_to_nul <<-EOF >>expect &&
> > >     ++  1QgreetingQAuto-mergingQAuto-merging greeting
> > >     ++  Q1QgreetingQCONFLICT (contents)QCONFLICT (content): Merge conflict in greeting
> > >     ++  Q2Qwhatever~tweak1QwhateverQCONFLICT (file/directory)QCONFLICT (file/directory): directory in the way of whatever from tweak1; moving it to whatever~tweak1 instead.
> > >     ++  Q1Qwhatever~tweak1QCONFLICT (modify/delete)QCONFLICT (modify/delete): whatever~tweak1 deleted in side2 and modified in tweak1.  Version tweak1 of whatever~tweak1 left in tree.
> > >     ++  Q1QΑυτά μου φαίνονται κινέζικαQAuto-mergingQAuto-merging Αυτά μου φαίνονται κινέζικα
> > >     ++  Q1QΑυτά μου φαίνονται κινέζικαQCONFLICT (contents)QCONFLICT (content): Merge conflict in Αυτά μου φαίνονται κινέζικα
> > >     ++  Q
> > >      +  EOF
> > >      +
> > >      +  test_cmp expect actual
> > > 23:  db73c6dd823 = 16:  47146dd59dd merge-tree: add a --allow-unrelated-histories flag
> > > 24:  d58a7c7a9f6 ! 17:  3ce28f6fd97 git-merge-tree.txt: add a section on potentional usage mistakes
> > >     @@ Documentation/git-merge-tree.txt: with linkgit:git-merge[1]:
> > >      +<<IM,Informational messages>> section has the necessary info, though it
> > >      +is not designed to be machine parseable.
> > >      +
> > >     ++Do NOT assume that each paths from <<CFI,Conflicted file info>>, and
> > >     ++the logical conflicts in the <<IM,Informational messages>> have a
> > >     ++one-to-one mapping, nor that there is a one-to-many mapping, nor a
> > >     ++many-to-one mapping.  Many-to-many mappings exist, meaning that each
> > >     ++path can have many logical conflict types in a single merge, and each
> > >     ++logical conflict type can affect many paths.
> > >     ++
> > >      +Do NOT assume all filenames listed in the <<IM,Informational messages>>
> > >      +section had conflicts.  Messages can be included for files that have no
> > >      +conflicts, such as "Auto-merging <file>".
> > > 26:  78e1243eca1 <  -:  ----------- WIP
> > > -- snap --
> > >
> > > I am pretty happy with the current state of the patches, and hope that we
> > > can push this patch series over the finish line.
> > >
> > > If you can think of anything I can do to help with this, please do let me
> > > know, I am _very_ interested in getting this done, and finally am in a
> > > position to help.
> >
> > Very much appreciated.  Looks like you're now blocking on my review,
> > so I'll try to make some time by end of week to look over things.
>
> Sorry for the delay.  However, I have looked over the changes in
> detail.  Well done!  I very much like this version.  Thanks for taking
> the time to break up my WIP patches, fix my bugs, address all the
> FIXMEs, and then going and adding some nice cleanups and improvements
> on top!  I'll add my Signed-off-by on a couple patches and have
> gitgitgadget submit this round.

That's great news, thank you so much!

Ciao,
Dscho
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index b78dde55ad9..5e7cea6cc8f 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -4275,6 +4275,37 @@  void merge_display_update_messages(struct merge_options *opt,
 	trace2_region_leave("merge", "display messages", opt->repo);
 }
 
+void merge_get_conflicted_files(struct merge_result *result,
+				struct string_list *conflicted_files)
+{
+	struct hashmap_iter iter;
+	struct strmap_entry *e;
+	struct merge_options_internal *opti = result->priv;
+
+	strmap_for_each_entry(&opti->conflicted, &iter, e) {
+		const char *path = e->key;
+		struct conflict_info *ci = e->value;
+		int i;
+
+		VERIFY_CI(ci);
+
+		for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
+			struct stage_info *si;
+
+			if (!(ci->filemask & (1ul << i)))
+				continue;
+
+			si = xmalloc(sizeof(*si));
+			si->stage = i+1;
+			si->mode = ci->stages[i].mode;
+			oidcpy(&si->oid, &ci->stages[i].oid);
+			string_list_append(conflicted_files, path)->util = si;
+		}
+	}
+	/* string_list_sort() uses a stable sort, so we're good */
+	string_list_sort(conflicted_files);
+}
+
 void merge_switch_to_result(struct merge_options *opt,
 			    struct tree *head,
 			    struct merge_result *result,
diff --git a/merge-ort.h b/merge-ort.h
index d643b47cb7c..e635a294ea8 100644
--- a/merge-ort.h
+++ b/merge-ort.h
@@ -2,6 +2,7 @@ 
 #define MERGE_ORT_H
 
 #include "merge-recursive.h"
+#include "hash.h"
 
 struct commit;
 struct tree;
@@ -89,6 +90,26 @@  void merge_display_update_messages(struct merge_options *opt,
 				   struct merge_result *result,
 				   FILE *stream);
 
+struct stage_info {
+	struct object_id oid;
+	int mode;
+	int stage;
+};
+
+/*
+ * Provide a list of path -> {struct stage_info*} mappings for
+ * all conflicted files.  Note that each path could appear up to three
+ * times in the list, corresponding to 3 different stage entries.  In short,
+ * this basically provides the info that would be printed by `ls-files -u`.
+ *
+ * result should have been populated by a call to
+ * one of the merge_incore_[non]recursive() functions.
+ *
+ * conflicted_files should be empty before calling this function.
+ */
+void merge_get_conflicted_files(struct merge_result *result,
+				struct string_list *conflicted_files);
+
 /* Do needed cleanup when not calling merge_switch_to_result() */
 void merge_finalize(struct merge_options *opt,
 		    struct merge_result *result);