diff mbox series

[v3,05/11] merge-resolve: rewrite in C

Message ID 20201005122646.27994-6-alban.gruin@gmail.com (mailing list archive)
State New, archived
Headers show
Series Rewrite the remaining merge strategies from shell to C | expand

Commit Message

Alban Gruin Oct. 5, 2020, 12:26 p.m. UTC
This rewrites `git merge-resolve' from shell to C.  As for `git
merge-one-file', this port is not completely straightforward and removes
calls to external processes to avoid reading and writing the index over
and over again.

 - The call to `update-index -q --refresh' is replaced by a call to
   refresh_index().

 - The call to `read-tree' is replaced by a call to unpack_trees() (and
   all the setup needed).

 - The call to `write-tree' is replaced by a call to
   write_index_as_tree().

 - The call to `merge-index', needed to invoke `git merge-one-file', is
   replaced by a call to the new merge_all() function.  A callback
   function, merge_one_file_cb(), is added to allow it to call
   merge_one_file() without forking.

Here too, the index is read in cmd_merge_resolve(), but
merge_strategies_resolve() takes care of writing it back to the disk.

The parameters of merge_strategies_resolve() will be surprising at first
glance: why using a commit list for `bases' and `remote', where we could
use an oid array, and a pointer to an oid?  Because, in a later commit,
try_merge_strategy() will be able to call merge_strategies_resolve()
directly, and it already uses a commit list for `bases' (`common') and
`remote' (`remoteheads'), and a string for `head_arg'.  To reduce
frictions later, merge_strategies_resolve() takes the same types of
parameters.

Signed-off-by: Alban Gruin <alban.gruin@gmail.com>
---
 Makefile                |  2 +-
 builtin.h               |  1 +
 builtin/merge-resolve.c | 69 +++++++++++++++++++++++++++++++++
 git-merge-resolve.sh    | 54 --------------------------
 git.c                   |  1 +
 merge-strategies.c      | 85 +++++++++++++++++++++++++++++++++++++++++
 merge-strategies.h      |  5 +++
 7 files changed, 162 insertions(+), 55 deletions(-)
 create mode 100644 builtin/merge-resolve.c
 delete mode 100755 git-merge-resolve.sh

Comments

Junio C Hamano Oct. 16, 2020, 7:19 p.m. UTC | #1
Alban Gruin <alban.gruin@gmail.com> writes:

> +#include "cache.h"
> +#include "builtin.h"
> +#include "merge-strategies.h"
> +
> +static const char builtin_merge_resolve_usage[] =
> +	"git merge-resolve <bases>... -- <head> <remote>";
> +
> +int cmd_merge_resolve(int argc, const char **argv, const char *prefix)
> +{
> +	int i, is_baseless = 1, sep_seen = 0;
> +	const char *head = NULL;
> +	struct commit_list *bases = NULL, *remote = NULL;
> +	struct commit_list **next_base = &bases;
> +
> +	if (argc < 5)
> +		usage(builtin_merge_resolve_usage);
> +
> +	setup_work_tree();
> +	if (repo_read_index(the_repository) < 0)
> +		die("invalid index");
> +
> +	/* The first parameters up to -- are merge bases; the rest are
> +	 * heads. */

Style (I won't repeat).

> +	for (i = 1; i < argc; i++) {
> +		if (strcmp(argv[i], "--") == 0)

	if (!strcmp(...))

is more typical than comparing with "== 0".

> +			sep_seen = 1;
> +		else if (strcmp(argv[i], "-h") == 0)
> +			usage(builtin_merge_resolve_usage);
> +		else if (sep_seen && !head)
> +			head = argv[i];
> +		else if (remote) {
> +			/* Give up if we are given two or more remotes.
> +			 * Not handling octopus. */
> +			return 2;
> +		} else {
> +			struct object_id oid;
> +
> +			get_oid(argv[i], &oid);
> +			is_baseless &= sep_seen;
> +
> +			if (!oideq(&oid, the_hash_algo->empty_tree)) {

What is this business about an empty tree about?

> +				struct commit *commit;
> +				commit = lookup_commit_or_die(&oid, argv[i]);
> +
> +				if (sep_seen)
> +					commit_list_append(commit, &remote);
> +				else
> +					next_base = commit_list_append(commit, next_base);
> +			}
> +		}
> +	}
> +
> +	/* Give up if this is a baseless merge. */
> +	if (is_baseless)
> +		return 2;

This is quite convoluted.  

The original is much more straight-forward.  We just said "grab
everything before we see '--' and call them bases; immediately after
'--' is HEAD and everything else is remote.  Now do we have any
base?  Otherwise we cannot handle it".

I cannot see an equivalence to it in the rewritten result, with the
bit operation with is_baseless and sep_seen.  Wouldn't it be the
matter of checking if next_base is NULL, or is there something more
subtle that deserves in-code comment going on?

Thanks.
Alban Gruin Nov. 6, 2020, 7:53 p.m. UTC | #2
Le 16/10/2020 à 21:19, Junio C Hamano a écrit :
> Alban Gruin <alban.gruin@gmail.com> writes:
> 
>> +#include "cache.h"
>> +#include "builtin.h"
>> +#include "merge-strategies.h"
>> +
>> +static const char builtin_merge_resolve_usage[] =
>> +	"git merge-resolve <bases>... -- <head> <remote>";
>> +
>> +int cmd_merge_resolve(int argc, const char **argv, const char *prefix)
>> +{
>> +	int i, is_baseless = 1, sep_seen = 0;
>> +	const char *head = NULL;
>> +	struct commit_list *bases = NULL, *remote = NULL;
>> +	struct commit_list **next_base = &bases;
>> +
>> +	if (argc < 5)
>> +		usage(builtin_merge_resolve_usage);
>> +
>> +	setup_work_tree();
>> +	if (repo_read_index(the_repository) < 0)
>> +		die("invalid index");
>> +
>> +	/* The first parameters up to -- are merge bases; the rest are
>> +	 * heads. */
> 
> Style (I won't repeat).
> 
>> +	for (i = 1; i < argc; i++) {
>> +		if (strcmp(argv[i], "--") == 0)
> 
> 	if (!strcmp(...))
> 
> is more typical than comparing with "== 0".
> 
>> +			sep_seen = 1;
>> +		else if (strcmp(argv[i], "-h") == 0)
>> +			usage(builtin_merge_resolve_usage);
>> +		else if (sep_seen && !head)
>> +			head = argv[i];
>> +		else if (remote) {
>> +			/* Give up if we are given two or more remotes.
>> +			 * Not handling octopus. */
>> +			return 2;
>> +		} else {
>> +			struct object_id oid;
>> +
>> +			get_oid(argv[i], &oid);
>> +			is_baseless &= sep_seen;
>> +
>> +			if (!oideq(&oid, the_hash_algo->empty_tree)) {
> 
> What is this business about an empty tree about?
> 

I don’t remember my intent here -- perhaps I wanted to avoid merges on
empty trees…  I’ll remove that from here and merge-octopus.c.

>> +				struct commit *commit;
>> +				commit = lookup_commit_or_die(&oid, argv[i]);
>> +
>> +				if (sep_seen)
>> +					commit_list_append(commit, &remote);
>> +				else
>> +					next_base = commit_list_append(commit, next_base);
>> +			}
>> +		}
>> +	}
>> +
>> +	/* Give up if this is a baseless merge. */
>> +	if (is_baseless)
>> +		return 2;
> 
> This is quite convoluted.  
> 
> The original is much more straight-forward.  We just said "grab
> everything before we see '--' and call them bases; immediately after
> '--' is HEAD and everything else is remote.  Now do we have any
> base?  Otherwise we cannot handle it".
> 
> I cannot see an equivalence to it in the rewritten result, with the
> bit operation with is_baseless and sep_seen.  Wouldn't it be the
> matter of checking if next_base is NULL, or is there something more
> subtle that deserves in-code comment going on?
> 

After re-reading this many, many weeks later, I can confirm that this is
convoluted, and that there is a much better way to perform some checks…
 for instance, checking if `bases' is NULL instead of having
`is_baseless', or checking after the loop if `remotes->next' is not NULL
to verify if there is multiple remotes.

> Thanks.
> 

Alban
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 6dfdb33cb2..3cc6b192f1 100644
--- a/Makefile
+++ b/Makefile
@@ -601,7 +601,6 @@  SCRIPT_SH += git-bisect.sh
 SCRIPT_SH += git-difftool--helper.sh
 SCRIPT_SH += git-filter-branch.sh
 SCRIPT_SH += git-merge-octopus.sh
-SCRIPT_SH += git-merge-resolve.sh
 SCRIPT_SH += git-mergetool.sh
 SCRIPT_SH += git-quiltimport.sh
 SCRIPT_SH += git-request-pull.sh
@@ -1097,6 +1096,7 @@  BUILTIN_OBJS += builtin/merge-index.o
 BUILTIN_OBJS += builtin/merge-one-file.o
 BUILTIN_OBJS += builtin/merge-ours.o
 BUILTIN_OBJS += builtin/merge-recursive.o
+BUILTIN_OBJS += builtin/merge-resolve.o
 BUILTIN_OBJS += builtin/merge-tree.o
 BUILTIN_OBJS += builtin/merge.o
 BUILTIN_OBJS += builtin/mktag.o
diff --git a/builtin.h b/builtin.h
index 4d2cd78856..35e91c16d0 100644
--- a/builtin.h
+++ b/builtin.h
@@ -180,6 +180,7 @@  int cmd_merge_ours(int argc, const char **argv, const char *prefix);
 int cmd_merge_file(int argc, const char **argv, const char *prefix);
 int cmd_merge_one_file(int argc, const char **argv, const char *prefix);
 int cmd_merge_recursive(int argc, const char **argv, const char *prefix);
+int cmd_merge_resolve(int argc, const char **argv, const char *prefix);
 int cmd_merge_tree(int argc, const char **argv, const char *prefix);
 int cmd_mktag(int argc, const char **argv, const char *prefix);
 int cmd_mktree(int argc, const char **argv, const char *prefix);
diff --git a/builtin/merge-resolve.c b/builtin/merge-resolve.c
new file mode 100644
index 0000000000..59f734473b
--- /dev/null
+++ b/builtin/merge-resolve.c
@@ -0,0 +1,69 @@ 
+/*
+ * Builtin "git merge-resolve"
+ *
+ * Copyright (c) 2020 Alban Gruin
+ *
+ * Based on git-merge-resolve.sh, written by Linus Torvalds and Junio C
+ * Hamano.
+ *
+ * Resolve two trees, using enhanced multi-base read-tree.
+ */
+
+#include "cache.h"
+#include "builtin.h"
+#include "merge-strategies.h"
+
+static const char builtin_merge_resolve_usage[] =
+	"git merge-resolve <bases>... -- <head> <remote>";
+
+int cmd_merge_resolve(int argc, const char **argv, const char *prefix)
+{
+	int i, is_baseless = 1, sep_seen = 0;
+	const char *head = NULL;
+	struct commit_list *bases = NULL, *remote = NULL;
+	struct commit_list **next_base = &bases;
+
+	if (argc < 5)
+		usage(builtin_merge_resolve_usage);
+
+	setup_work_tree();
+	if (repo_read_index(the_repository) < 0)
+		die("invalid index");
+
+	/* The first parameters up to -- are merge bases; the rest are
+	 * heads. */
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "--") == 0)
+			sep_seen = 1;
+		else if (strcmp(argv[i], "-h") == 0)
+			usage(builtin_merge_resolve_usage);
+		else if (sep_seen && !head)
+			head = argv[i];
+		else if (remote) {
+			/* Give up if we are given two or more remotes.
+			 * Not handling octopus. */
+			return 2;
+		} else {
+			struct object_id oid;
+
+			get_oid(argv[i], &oid);
+			is_baseless &= sep_seen;
+
+			if (!oideq(&oid, the_hash_algo->empty_tree)) {
+				struct commit *commit;
+				commit = lookup_commit_or_die(&oid, argv[i]);
+
+				if (sep_seen)
+					commit_list_append(commit, &remote);
+				else
+					next_base = commit_list_append(commit, next_base);
+			}
+		}
+	}
+
+	/* Give up if this is a baseless merge. */
+	if (is_baseless)
+		return 2;
+
+	return merge_strategies_resolve(the_repository, bases, head, remote);
+}
diff --git a/git-merge-resolve.sh b/git-merge-resolve.sh
deleted file mode 100755
index 343fe7bccd..0000000000
--- a/git-merge-resolve.sh
+++ /dev/null
@@ -1,54 +0,0 @@ 
-#!/bin/sh
-#
-# Copyright (c) 2005 Linus Torvalds
-# Copyright (c) 2005 Junio C Hamano
-#
-# Resolve two trees, using enhanced multi-base read-tree.
-
-# The first parameters up to -- are merge bases; the rest are heads.
-bases= head= remotes= sep_seen=
-for arg
-do
-	case ",$sep_seen,$head,$arg," in
-	*,--,)
-		sep_seen=yes
-		;;
-	,yes,,*)
-		head=$arg
-		;;
-	,yes,*)
-		remotes="$remotes$arg "
-		;;
-	*)
-		bases="$bases$arg "
-		;;
-	esac
-done
-
-# Give up if we are given two or more remotes -- not handling octopus.
-case "$remotes" in
-?*' '?*)
-	exit 2 ;;
-esac
-
-# Give up if this is a baseless merge.
-if test '' = "$bases"
-then
-	exit 2
-fi
-
-git update-index -q --refresh
-git read-tree -u -m --aggressive $bases $head $remotes || exit 2
-echo "Trying simple merge."
-if result_tree=$(git write-tree 2>/dev/null)
-then
-	exit 0
-else
-	echo "Simple merge failed, trying Automatic merge."
-	if git merge-index -o git-merge-one-file -a
-	then
-		exit 0
-	else
-		exit 1
-	fi
-fi
diff --git a/git.c b/git.c
index a4d3f98094..64a1a1de41 100644
--- a/git.c
+++ b/git.c
@@ -544,6 +544,7 @@  static struct cmd_struct commands[] = {
 	{ "merge-recursive", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
 	{ "merge-recursive-ours", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
 	{ "merge-recursive-theirs", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
+	{ "merge-resolve", cmd_merge_resolve, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
 	{ "merge-subtree", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT },
 	{ "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT },
 	{ "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT },
diff --git a/merge-strategies.c b/merge-strategies.c
index c022ba9748..6b4b3d03a6 100644
--- a/merge-strategies.c
+++ b/merge-strategies.c
@@ -1,8 +1,11 @@ 
 #include "cache.h"
+#include "cache-tree.h"
 #include "dir.h"
 #include "ll-merge.h"
+#include "lockfile.h"
 #include "merge-strategies.h"
 #include "run-command.h"
+#include "unpack-trees.h"
 #include "xdiff-interface.h"
 
 static int add_to_index_cacheinfo(struct index_state *istate,
@@ -322,3 +325,85 @@  int merge_all(struct index_state *istate, int oneshot, int quiet,
 
 	return err;
 }
+
+static int add_tree(const struct object_id *oid, struct tree_desc *t)
+{
+	struct tree *tree;
+
+	tree = parse_tree_indirect(oid);
+	if (parse_tree(tree))
+		return -1;
+
+	init_tree_desc(t, tree->buffer, tree->size);
+	return 0;
+}
+
+int merge_strategies_resolve(struct repository *r,
+			     struct commit_list *bases, const char *head_arg,
+			     struct commit_list *remote)
+{
+	int i = 0;
+	struct lock_file lock = LOCK_INIT;
+	struct tree_desc t[MAX_UNPACK_TREES];
+	struct unpack_trees_options opts;
+	struct object_id head, oid;
+	struct commit_list *j;
+
+	if (head_arg)
+		get_oid(head_arg, &head);
+
+	repo_hold_locked_index(r, &lock, LOCK_DIE_ON_ERROR);
+	refresh_index(r->index, 0, NULL, NULL, NULL);
+
+	memset(&opts, 0, sizeof(opts));
+	opts.head_idx = 1;
+	opts.src_index = r->index;
+	opts.dst_index = r->index;
+	opts.update = 1;
+	opts.merge = 1;
+	opts.aggressive = 1;
+
+	for (j = bases; j && j->item; j = j->next) {
+		if (add_tree(&j->item->object.oid, t + (i++)))
+			goto out;
+	}
+
+	if (head_arg && add_tree(&head, t + (i++)))
+		goto out;
+	if (remote && add_tree(&remote->item->object.oid, t + (i++)))
+		goto out;
+
+	if (i == 1)
+		opts.fn = oneway_merge;
+	else if (i == 2) {
+		opts.fn = twoway_merge;
+		opts.initial_checkout = is_index_unborn(r->index);
+	} else if (i >= 3) {
+		opts.fn = threeway_merge;
+		opts.head_idx = i - 1;
+	}
+
+	if (unpack_trees(i, t, &opts))
+		goto out;
+
+	puts(_("Trying simple merge."));
+	write_locked_index(r->index, &lock, COMMIT_LOCK);
+
+	if (write_index_as_tree(&oid, r->index, r->index_file,
+				WRITE_TREE_SILENT, NULL)) {
+		int ret;
+
+		puts(_("Simple merge failed, trying Automatic merge."));
+		repo_hold_locked_index(r, &lock, LOCK_DIE_ON_ERROR);
+		ret = merge_all(r->index, 0, 0, merge_one_file_cb, r);
+
+		write_locked_index(r->index, &lock, COMMIT_LOCK);
+		return !!ret;
+	}
+
+	return 0;
+
+ out:
+	rollback_lock_file(&lock);
+	return 2;
+}
diff --git a/merge-strategies.h b/merge-strategies.h
index 40e175ca39..778f8ce9d6 100644
--- a/merge-strategies.h
+++ b/merge-strategies.h
@@ -1,6 +1,7 @@ 
 #ifndef MERGE_STRATEGIES_H
 #define MERGE_STRATEGIES_H
 
+#include "commit.h"
 #include "object.h"
 
 int merge_strategies_one_file(struct repository *r,
@@ -33,4 +34,8 @@  int merge_one_path(struct index_state *istate, int oneshot, int quiet,
 int merge_all(struct index_state *istate, int oneshot, int quiet,
 	      merge_cb cb, void *data);
 
+int merge_strategies_resolve(struct repository *r,
+			     struct commit_list *bases, const char *head_arg,
+			     struct commit_list *remote);
+
 #endif /* MERGE_STRATEGIES_H */