diff mbox series

[v2,6/8] merge-ort: format messages slightly different for use in headers

Message ID 887e46435c0561e86f1858682fe53e9de926b69c.1640419160.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Add a new --remerge-diff capability to show & log | expand

Commit Message

Elijah Newren Dec. 25, 2021, 7:59 a.m. UTC
From: Elijah Newren <newren@gmail.com>

When users run
    git show --remerge-diff $MERGE_COMMIT
or
    git log -p --remerge-diff ...
stdout is not an appropriate location to dump conflict messages, but we
do want to provide them to users.  We will include them in the diff
headers instead...but for that to work, we need for any multiline
messages to replace newlines with both a newline and a space.  Add a new
flag to signal when we want these messages modified in such a fashion,
and use it in path_msg() to modify these messages this way.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c       | 36 ++++++++++++++++++++++++++++++++++--
 merge-recursive.c |  3 +++
 merge-recursive.h |  1 +
 3 files changed, 38 insertions(+), 2 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Dec. 26, 2021, 6:30 p.m. UTC | #1
On Sat, Dec 25 2021, Elijah Newren via GitGitGadget wrote:

> @@ -634,17 +634,46 @@ static void path_msg(struct merge_options *opt,
>  		     const char *fmt, ...)
>  {
>  	va_list ap;
> -	struct strbuf *sb = strmap_get(&opt->priv->output, path);
> +	struct strbuf *sb, *dest;
> +	struct strbuf tmp = STRBUF_INIT;
> +
> +	if (opt->record_conflict_msgs_as_headers && omittable_hint)
> +		return; /* Do not record mere hints in tree */
> +	sb = strmap_get(&opt->priv->output, path);
>  	if (!sb) {
>  		sb = xmalloc(sizeof(*sb));
>  		strbuf_init(sb, 0);
>  		strmap_put(&opt->priv->output, path, sb);
>  	}
>  
> +	dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb);
> +
>  	va_start(ap, fmt);
> -	strbuf_vaddf(sb, fmt, ap);
> +	strbuf_vaddf(dest, fmt, ap);
>  	va_end(ap);
>  
> +	if (opt->record_conflict_msgs_as_headers) {
> +		int i_sb = 0, i_tmp = 0;
> +
> +		/* Copy tmp to sb, adding spaces after newlines */
> +		strbuf_grow(sb, 2*tmp.len); /* more than sufficient */
> +		for (; i_tmp < tmp.len; i_tmp++, i_sb++) {
> +			/* Copy next character from tmp to sb */
> +			sb->buf[sb->len + i_sb] = tmp.buf[i_tmp];
> +
> +			/* If we copied a newline, add a space */
> +			if (tmp.buf[i_tmp] == '\n')
> +				sb->buf[++i_sb] = ' ';
> +		}
> +		/* Update length and ensure it's NUL-terminated */
> +		sb->len += i_sb;
> +		sb->buf[sb->len] = '\0';
> +
> +		/* Clean up tmp */
> +		strbuf_release(&tmp);
> +	}
> +
> +	/* Add final newline character to sb */
>  	strbuf_addch(sb, '\n');
>  }
>  

I'm not saying this is wrong or needs to change. Just a reader's note
that this sent me on an interesting journey of looking at various
in-tree callers of strbufs that want to do the equivalent of
s/$from/$to/ on a strbuf, with and without the equivalent of /g.

I figured I'd change the $subject since this is more of a general
musing...

In trailer.c we've got strbuf_replace(), which looks like it could be
made to be general enough to serve most callers if it did a memmem()
instead of a strstr(), and knew to take a "all" flag to implement a /g.

We then have e.g. lf_to_crlf() in imap-send.c, which uses a newly
alloc'd buffer followed by a strbuf_attach(), which is a common pattern.

Then strbuf_reencode() in strbuf.c basically solves this problem, and
calls reencode_string_len(), both it and the underlying function are
*almost* general enough to know to take some "from/to" string/length
pair, i.e. to not be bound to "reencoding" only with iconv().

Then there's strbuf_add_percentencode() and strbuf_add_urlencode() whose
API users might be happy with in-place replacing, but do a
read-and-copy-maybe-expand.

It might be an interesting follow-up project for someone to come up with
a generic in-place search-replace function with a signature like:

	int strbuf_replace(struct strbuf *sb, const char *from,
			   size_t from_len, const char *to,
			   size_t to_len, int max);

To do e.g. in this case:

	if (opt->record_conflict_msgs_as_headers)
		strbuf_replace(sb, "\n", strlen("\n"), "\n ", strlen("\n "), -1);

The various in-tree implementations do some variant of over-mallocing to
save work in the loop (as is being done here), copying where a small
realloc/memmove might do, scanning the string to figure out how much to
malloc, then copying in a second pass etc.
Johannes Altmanninger Dec. 28, 2021, 10:56 a.m. UTC | #2
On Sat, Dec 25, 2021 at 07:59:17AM +0000, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@gmail.com>
> 
> When users run
>     git show --remerge-diff $MERGE_COMMIT
> or
>     git log -p --remerge-diff ...
> stdout is not an appropriate location to dump conflict messages, but we
> do want to provide them to users.  We will include them in the diff
> headers instead...but for that to work, we need for any multiline
> messages to replace newlines with both a newline and a space.  Add a new
> flag to signal when we want these messages modified in such a fashion,
> and use it in path_msg() to modify these messages this way.

makes sense (same for patches 4 & 5)

> 
> Signed-off-by: Elijah Newren <newren@gmail.com>
> ---
>  merge-ort.c       | 36 ++++++++++++++++++++++++++++++++++--
>  merge-recursive.c |  3 +++
>  merge-recursive.h |  1 +
>  3 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/merge-ort.c b/merge-ort.c
> index 998e92ec593..9142d56e0ad 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -634,17 +634,46 @@ static void path_msg(struct merge_options *opt,
>  		     const char *fmt, ...)
>  {
>  	va_list ap;
> -	struct strbuf *sb = strmap_get(&opt->priv->output, path);
> +	struct strbuf *sb, *dest;
> +	struct strbuf tmp = STRBUF_INIT;
> +
> +	if (opt->record_conflict_msgs_as_headers && omittable_hint)
> +		return; /* Do not record mere hints in tree */
> +	sb = strmap_get(&opt->priv->output, path);
>  	if (!sb) {
>  		sb = xmalloc(sizeof(*sb));
>  		strbuf_init(sb, 0);
>  		strmap_put(&opt->priv->output, path, sb);
>  	}
>  
> +	dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb);
> +
>  	va_start(ap, fmt);
> -	strbuf_vaddf(sb, fmt, ap);
> +	strbuf_vaddf(dest, fmt, ap);
>  	va_end(ap);
>  
> +	if (opt->record_conflict_msgs_as_headers) {
> +		int i_sb = 0, i_tmp = 0;
> +
> +		/* Copy tmp to sb, adding spaces after newlines */
> +		strbuf_grow(sb, 2*tmp.len); /* more than sufficient */
> +		for (; i_tmp < tmp.len; i_tmp++, i_sb++) {
> +			/* Copy next character from tmp to sb */
> +			sb->buf[sb->len + i_sb] = tmp.buf[i_tmp];
> +
> +			/* If we copied a newline, add a space */
> +			if (tmp.buf[i_tmp] == '\n')
> +				sb->buf[++i_sb] = ' ';
> +		}
> +		/* Update length and ensure it's NUL-terminated */

I think this and the two comments inside the loop are mostly redundant. I'd
drop them (except maybe this one because it's a common oversight I guess).

> +		sb->len += i_sb;
> +		sb->buf[sb->len] = '\0';
> +
> +		/* Clean up tmp */

Also this one I guess

> +		strbuf_release(&tmp);
> +	}
> +
> +	/* Add final newline character to sb */
>  	strbuf_addch(sb, '\n');
>  }
>  
> @@ -4246,6 +4275,9 @@ void merge_switch_to_result(struct merge_options *opt,
>  		struct string_list olist = STRING_LIST_INIT_NODUP;
>  		int i;
>  
> +		if (opt->record_conflict_msgs_as_headers)
> +			BUG("Either display conflict messages or record them as headers, not both");
> +
>  		trace2_region_enter("merge", "display messages", opt->repo);
>  
>  		/* Hack to pre-allocate olist to the desired size */
> diff --git a/merge-recursive.c b/merge-recursive.c
> index bc73c52dd84..c9ba7e904a6 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -3714,6 +3714,9 @@ static int merge_start(struct merge_options *opt, struct tree *head)
>  
>  	assert(opt->priv == NULL);
>  
> +	/* Not supported; option specific to merge-ort */
> +	assert(!opt->record_conflict_msgs_as_headers);
> +
>  	/* Sanity check on repo state; index must match head */
>  	if (repo_index_has_changes(opt->repo, head, &sb)) {
>  		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
> diff --git a/merge-recursive.h b/merge-recursive.h
> index 0795a1d3ec1..ebfdb7f994e 100644
> --- a/merge-recursive.h
> +++ b/merge-recursive.h
> @@ -46,6 +46,7 @@ struct merge_options {
>  	/* miscellaneous control options */
>  	const char *subtree_shift;
>  	unsigned renormalize : 1;
> +	unsigned record_conflict_msgs_as_headers : 1;
>  
>  	/* internal fields used by the implementation */
>  	struct merge_options_internal *priv;
> -- 
> gitgitgadget
>
Elijah Newren Dec. 28, 2021, 9:48 p.m. UTC | #3
On Tue, Dec 28, 2021 at 2:56 AM Johannes Altmanninger <aclopte@gmail.com> wrote:
>
> On Sat, Dec 25, 2021 at 07:59:17AM +0000, Elijah Newren via GitGitGadget wrote:
> > From: Elijah Newren <newren@gmail.com>
> >
> > When users run
> >     git show --remerge-diff $MERGE_COMMIT
> > or
> >     git log -p --remerge-diff ...
> > stdout is not an appropriate location to dump conflict messages, but we
> > do want to provide them to users.  We will include them in the diff
> > headers instead...but for that to work, we need for any multiline
> > messages to replace newlines with both a newline and a space.  Add a new
> > flag to signal when we want these messages modified in such a fashion,
> > and use it in path_msg() to modify these messages this way.
>
> makes sense (same for patches 4 & 5)
>
> >
> > Signed-off-by: Elijah Newren <newren@gmail.com>
> > ---
> >  merge-ort.c       | 36 ++++++++++++++++++++++++++++++++++--
> >  merge-recursive.c |  3 +++
> >  merge-recursive.h |  1 +
> >  3 files changed, 38 insertions(+), 2 deletions(-)
> >
> > diff --git a/merge-ort.c b/merge-ort.c
> > index 998e92ec593..9142d56e0ad 100644
> > --- a/merge-ort.c
> > +++ b/merge-ort.c
> > @@ -634,17 +634,46 @@ static void path_msg(struct merge_options *opt,
> >                    const char *fmt, ...)
> >  {
> >       va_list ap;
> > -     struct strbuf *sb = strmap_get(&opt->priv->output, path);
> > +     struct strbuf *sb, *dest;
> > +     struct strbuf tmp = STRBUF_INIT;
> > +
> > +     if (opt->record_conflict_msgs_as_headers && omittable_hint)
> > +             return; /* Do not record mere hints in tree */
> > +     sb = strmap_get(&opt->priv->output, path);
> >       if (!sb) {
> >               sb = xmalloc(sizeof(*sb));
> >               strbuf_init(sb, 0);
> >               strmap_put(&opt->priv->output, path, sb);
> >       }
> >
> > +     dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb);
> > +
> >       va_start(ap, fmt);
> > -     strbuf_vaddf(sb, fmt, ap);
> > +     strbuf_vaddf(dest, fmt, ap);
> >       va_end(ap);
> >
> > +     if (opt->record_conflict_msgs_as_headers) {
> > +             int i_sb = 0, i_tmp = 0;
> > +
> > +             /* Copy tmp to sb, adding spaces after newlines */
> > +             strbuf_grow(sb, 2*tmp.len); /* more than sufficient */
> > +             for (; i_tmp < tmp.len; i_tmp++, i_sb++) {
> > +                     /* Copy next character from tmp to sb */
> > +                     sb->buf[sb->len + i_sb] = tmp.buf[i_tmp];
> > +
> > +                     /* If we copied a newline, add a space */
> > +                     if (tmp.buf[i_tmp] == '\n')
> > +                             sb->buf[++i_sb] = ' ';
> > +             }
> > +             /* Update length and ensure it's NUL-terminated */
>
> I think this and the two comments inside the loop are mostly redundant. I'd
> drop them (except maybe this one because it's a common oversight I guess).

I don't think redundancy is (necessarily) a reason to drop comments.
Take for example the following from early in abspath.c:

    /* Find start of the last component */
    while (offset < len && !is_dir_sep(path->buf[len - 1]))
        len--;
    /* Skip sequences of multiple path-separators */
    while (offset < len && is_dir_sep(path->buf[len - 1]))
        len--;

The comment quickly explains what might take a bit more time to reason
out.  Since I'm dealing with multiple different indices and various
arithmetic, I figured a quick explanation was helpful.  And, of
course, the reminder to make it NUL-terminated.  Granted, if the code
is very readily obvious then comments are not helpful, and there's a
gray area somewhere in between.  I think the code we're discussing
here is in that gray area, where it's a matter of taste what the
threshold is.  I don't find your taste here unreasonable, but I don't
find mine for the above examples to be unreasonable either.  I'd
rather leave these in.

>
> > +             sb->len += i_sb;
> > +             sb->buf[sb->len] = '\0';
> > +
> > +             /* Clean up tmp */
>
> Also this one I guess

Yeah, I'll nuke this one.  The reason for this comment was more that
sometimes I like having a comment apply to the code below it until the
next comment; do thing that way avoids the need to put a
here-ends-the-previous-comment comment, or arbitrarily avoid blank
lines after a comment.  But that reasoning is a bit weaker here, and
it clearly doesn't need any explanation, so I'll just drop it.

> > +             strbuf_release(&tmp);
> > +     }
> > +
> > +     /* Add final newline character to sb */
> >       strbuf_addch(sb, '\n');
> >  }
> >
> > @@ -4246,6 +4275,9 @@ void merge_switch_to_result(struct merge_options *opt,
> >               struct string_list olist = STRING_LIST_INIT_NODUP;
> >               int i;
> >
> > +             if (opt->record_conflict_msgs_as_headers)
> > +                     BUG("Either display conflict messages or record them as headers, not both");
> > +
> >               trace2_region_enter("merge", "display messages", opt->repo);
> >
> >               /* Hack to pre-allocate olist to the desired size */
> > diff --git a/merge-recursive.c b/merge-recursive.c
> > index bc73c52dd84..c9ba7e904a6 100644
> > --- a/merge-recursive.c
> > +++ b/merge-recursive.c
> > @@ -3714,6 +3714,9 @@ static int merge_start(struct merge_options *opt, struct tree *head)
> >
> >       assert(opt->priv == NULL);
> >
> > +     /* Not supported; option specific to merge-ort */
> > +     assert(!opt->record_conflict_msgs_as_headers);
> > +
> >       /* Sanity check on repo state; index must match head */
> >       if (repo_index_has_changes(opt->repo, head, &sb)) {
> >               err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
> > diff --git a/merge-recursive.h b/merge-recursive.h
> > index 0795a1d3ec1..ebfdb7f994e 100644
> > --- a/merge-recursive.h
> > +++ b/merge-recursive.h
> > @@ -46,6 +46,7 @@ struct merge_options {
> >       /* miscellaneous control options */
> >       const char *subtree_shift;
> >       unsigned renormalize : 1;
> > +     unsigned record_conflict_msgs_as_headers : 1;
> >
> >       /* internal fields used by the implementation */
> >       struct merge_options_internal *priv;
> > --
> > gitgitgadget
> >
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 998e92ec593..9142d56e0ad 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -634,17 +634,46 @@  static void path_msg(struct merge_options *opt,
 		     const char *fmt, ...)
 {
 	va_list ap;
-	struct strbuf *sb = strmap_get(&opt->priv->output, path);
+	struct strbuf *sb, *dest;
+	struct strbuf tmp = STRBUF_INIT;
+
+	if (opt->record_conflict_msgs_as_headers && omittable_hint)
+		return; /* Do not record mere hints in tree */
+	sb = strmap_get(&opt->priv->output, path);
 	if (!sb) {
 		sb = xmalloc(sizeof(*sb));
 		strbuf_init(sb, 0);
 		strmap_put(&opt->priv->output, path, sb);
 	}
 
+	dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb);
+
 	va_start(ap, fmt);
-	strbuf_vaddf(sb, fmt, ap);
+	strbuf_vaddf(dest, fmt, ap);
 	va_end(ap);
 
+	if (opt->record_conflict_msgs_as_headers) {
+		int i_sb = 0, i_tmp = 0;
+
+		/* Copy tmp to sb, adding spaces after newlines */
+		strbuf_grow(sb, 2*tmp.len); /* more than sufficient */
+		for (; i_tmp < tmp.len; i_tmp++, i_sb++) {
+			/* Copy next character from tmp to sb */
+			sb->buf[sb->len + i_sb] = tmp.buf[i_tmp];
+
+			/* If we copied a newline, add a space */
+			if (tmp.buf[i_tmp] == '\n')
+				sb->buf[++i_sb] = ' ';
+		}
+		/* Update length and ensure it's NUL-terminated */
+		sb->len += i_sb;
+		sb->buf[sb->len] = '\0';
+
+		/* Clean up tmp */
+		strbuf_release(&tmp);
+	}
+
+	/* Add final newline character to sb */
 	strbuf_addch(sb, '\n');
 }
 
@@ -4246,6 +4275,9 @@  void merge_switch_to_result(struct merge_options *opt,
 		struct string_list olist = STRING_LIST_INIT_NODUP;
 		int i;
 
+		if (opt->record_conflict_msgs_as_headers)
+			BUG("Either display conflict messages or record them as headers, not both");
+
 		trace2_region_enter("merge", "display messages", opt->repo);
 
 		/* Hack to pre-allocate olist to the desired size */
diff --git a/merge-recursive.c b/merge-recursive.c
index bc73c52dd84..c9ba7e904a6 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -3714,6 +3714,9 @@  static int merge_start(struct merge_options *opt, struct tree *head)
 
 	assert(opt->priv == NULL);
 
+	/* Not supported; option specific to merge-ort */
+	assert(!opt->record_conflict_msgs_as_headers);
+
 	/* Sanity check on repo state; index must match head */
 	if (repo_index_has_changes(opt->repo, head, &sb)) {
 		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
diff --git a/merge-recursive.h b/merge-recursive.h
index 0795a1d3ec1..ebfdb7f994e 100644
--- a/merge-recursive.h
+++ b/merge-recursive.h
@@ -46,6 +46,7 @@  struct merge_options {
 	/* miscellaneous control options */
 	const char *subtree_shift;
 	unsigned renormalize : 1;
+	unsigned record_conflict_msgs_as_headers : 1;
 
 	/* internal fields used by the implementation */
 	struct merge_options_internal *priv;