diff mbox series

[04/19] merge-recursive: exit early if index != head

Message ID 20190725174611.14802-5-newren@gmail.com (mailing list archive)
State New, archived
Headers show
Series Cleanup merge API | expand

Commit Message

Elijah Newren July 25, 2019, 5:45 p.m. UTC
We had a rule to enforce that the index matches head, but it was found
at the beginning of merge_trees() and would only trigger when
opt->call_depth was 0.  Since merge_recursive() doesn't call
merge_trees() until after returning from recursing, this meant that the
check wasn't triggered by merge_recursive() until it had first finished
all the intermediate merges to create virtual merge bases.  That is a
potentially huge amount of computation (and writing of intermediate
merge results into the .git/objects directory) before it errors out and
says, in effect, "Sorry, I can't do any merging because you have some
local changes that would be overwritten."

Further, not enforcing this requirement earlier allowed other bugs (such
as an unintentional unconditional dropping and reloading of the index in
merge_recursive() even when no recursion was necessary), to mask bugs in
other callers (which were fixed in the commit prior to this one).

Make sure we do the index == head check at the beginning of the merge,
and error out immediately if it fails.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-recursive.c | 93 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 69 insertions(+), 24 deletions(-)

Comments

Johannes Schindelin July 25, 2019, 7:51 p.m. UTC | #1
Hi Elijah,

On Thu, 25 Jul 2019, Elijah Newren wrote:

> We had a rule to enforce that the index matches head, but it was found
> at the beginning of merge_trees() and would only trigger when
> opt->call_depth was 0.  Since merge_recursive() doesn't call
> merge_trees() until after returning from recursing, this meant that the
> check wasn't triggered by merge_recursive() until it had first finished
> all the intermediate merges to create virtual merge bases.  That is a
> potentially huge amount of computation (and writing of intermediate
> merge results into the .git/objects directory) before it errors out and
> says, in effect, "Sorry, I can't do any merging because you have some
> local changes that would be overwritten."
>
> Further, not enforcing this requirement earlier allowed other bugs (such
> as an unintentional unconditional dropping and reloading of the index in
> merge_recursive() even when no recursion was necessary), to mask bugs in
> other callers (which were fixed in the commit prior to this one).
>
> Make sure we do the index == head check at the beginning of the merge,
> and error out immediately if it fails.

Very clear commit message.

> diff --git a/merge-recursive.c b/merge-recursive.c
> index 37bb94fb4d..b762ecd7bd 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -3381,21 +3381,14 @@ static int process_entry(struct merge_options *opt,
>  	return clean_merge;
>  }
>
> -int merge_trees(struct merge_options *opt,
> -		struct tree *head,
> -		struct tree *merge,
> -		struct tree *common,
> -		struct tree **result)
> +static int merge_trees_internal(struct merge_options *opt,

In other, similar cases, we seem to use the suffix `_1`. Not sure
whether you want to change that here.

> +				struct tree *head,
> +				struct tree *merge,
> +				struct tree *common,
> +				struct tree **result)
>  {
>  	struct index_state *istate = opt->repo->index;
>  	int code, clean;
> -	struct strbuf sb = STRBUF_INIT;
> -
> -	if (!opt->call_depth && repo_index_has_changes(opt->repo, head, &sb)) {
> -		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
> -		    sb.buf);
> -		return -1;
> -	}
>
>  	if (opt->subtree_shift) {
>  		merge = shift_tree_object(opt->repo, head, merge, opt->subtree_shift);
> @@ -3499,11 +3492,11 @@ static struct commit_list *reverse_commit_list(struct commit_list *list)
>   * Merge the commits h1 and h2, return the resulting virtual
>   * commit object and a flag indicating the cleanness of the merge.
>   */
> -int merge_recursive(struct merge_options *opt,
> -		    struct commit *h1,
> -		    struct commit *h2,
> -		    struct commit_list *ca,
> -		    struct commit **result)
> +static int merge_recursive_internal(struct merge_options *opt,

Same here.

> +				    struct commit *h1,
> +				    struct commit *h2,
> +				    struct commit_list *ca,
> +				    struct commit **result)
>  {
>  	struct commit_list *iter;
>  	struct commit *merged_common_ancestors;
> [...]
> @@ -3596,6 +3589,58 @@ int merge_recursive(struct merge_options *opt,
>  	return clean;
>  }
>
> +static int merge_start(struct merge_options *opt, struct tree *head)

I would prefer to call this something like `check_invariants()` or
something similar.

> +{
> +	struct strbuf sb = STRBUF_INIT;
> +
> +	assert(opt->branch1 && opt->branch2);
> +
> +	if (repo_index_has_changes(opt->repo, head, &sb)) {
> +		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
> +		    sb.buf);

I know that you did not introduce this leak, but maybe we could slip an
`strbuf_release(&sb);` in at this point?

> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static void merge_finalize(struct merge_options *opt)
> +{
> +	/* Common code for wrapping up merges will be added here later */
> +}

I was about to comment that this complicates this here diff and that we
should do this when we need it, but I just peeked into the next patch,
and it uses it, so I leave this here paragraph only to show that I
actually reviewed this part, too.

And of course, if we have a `merge_finalize()`, then a `merge_start()`
does not sound all that bad, either.

The rest looks good to me.

Thanks,
Dscho

> +
> +int merge_trees(struct merge_options *opt,
> +		struct tree *head,
> +		struct tree *merge,
> +		struct tree *common,
> +		struct tree **result)
> +{
> +	int ret;
> +
> +	if (merge_start(opt, head))
> +		return -1;
> +	ret = merge_trees_internal(opt, head, merge, common, result);
> +	merge_finalize(opt);
> +
> +	return ret;
> +}
> +
> +int merge_recursive(struct merge_options *opt,
> +		    struct commit *h1,
> +		    struct commit *h2,
> +		    struct commit_list *ca,
> +		    struct commit **result)
> +{
> +	int ret;
> +
> +	if (merge_start(opt, repo_get_commit_tree(opt->repo, h1)))
> +		return -1;
> +	ret = merge_recursive_internal(opt, h1, h2, ca, result);
> +	merge_finalize(opt);
> +
> +	return ret;
> +}
> +
>  static struct commit *get_ref(struct repository *repo, const struct object_id *oid,
>  			      const char *name)
>  {
> --
> 2.22.0.559.g28a8880890.dirty
>
>
Elijah Newren July 25, 2019, 8:12 p.m. UTC | #2
On Thu, Jul 25, 2019 at 12:51 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Elijah,
>
> On Thu, 25 Jul 2019, Elijah Newren wrote:
>
> > We had a rule to enforce that the index matches head, but it was found
> > at the beginning of merge_trees() and would only trigger when
> > opt->call_depth was 0.  Since merge_recursive() doesn't call
> > merge_trees() until after returning from recursing, this meant that the
> > check wasn't triggered by merge_recursive() until it had first finished
> > all the intermediate merges to create virtual merge bases.  That is a
> > potentially huge amount of computation (and writing of intermediate
> > merge results into the .git/objects directory) before it errors out and
> > says, in effect, "Sorry, I can't do any merging because you have some
> > local changes that would be overwritten."
> >
> > Further, not enforcing this requirement earlier allowed other bugs (such
> > as an unintentional unconditional dropping and reloading of the index in
> > merge_recursive() even when no recursion was necessary), to mask bugs in
> > other callers (which were fixed in the commit prior to this one).
> >
> > Make sure we do the index == head check at the beginning of the merge,
> > and error out immediately if it fails.
>
> Very clear commit message.
>
> > diff --git a/merge-recursive.c b/merge-recursive.c
> > index 37bb94fb4d..b762ecd7bd 100644
> > --- a/merge-recursive.c
> > +++ b/merge-recursive.c
> > @@ -3381,21 +3381,14 @@ static int process_entry(struct merge_options *opt,
> >       return clean_merge;
> >  }
> >
> > -int merge_trees(struct merge_options *opt,
> > -             struct tree *head,
> > -             struct tree *merge,
> > -             struct tree *common,
> > -             struct tree **result)
> > +static int merge_trees_internal(struct merge_options *opt,
>
> In other, similar cases, we seem to use the suffix `_1`. Not sure
> whether you want to change that here.

Hmm, we do seem to use `_1` about 2.5 times as frequently as
`_internal`, but both do seem to be in use to me (e.g.
init_tree_desc_internal, convert_to_working_tree_internal,
repo_parse_commit_internal).  `_1` does have the advantage of being
shorter, which makes lines fit in 80 columns better, but `_internal`
seems like a clearer description to me.

So...I'm not sure what's best here.  I don't have a strong opinion,
but I'm inclined towards laziness and leaving it alone unless someone
else has strong opinions.

> > +{
> > +     struct strbuf sb = STRBUF_INIT;
> > +
> > +     assert(opt->branch1 && opt->branch2);
> > +
> > +     if (repo_index_has_changes(opt->repo, head, &sb)) {
> > +             err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
> > +                 sb.buf);
>
> I know that you did not introduce this leak, but maybe we could slip an
> `strbuf_release(&sb);` in at this point?

Ooh, good point.  I'll fix that up for round 2.

> > +             return -1;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static void merge_finalize(struct merge_options *opt)
> > +{
> > +     /* Common code for wrapping up merges will be added here later */
> > +}
>
> I was about to comment that this complicates this here diff and that we
> should do this when we need it, but I just peeked into the next patch,
> and it uses it, so I leave this here paragraph only to show that I
> actually reviewed this part, too.
>
> And of course, if we have a `merge_finalize()`, then a `merge_start()`
> does not sound all that bad, either.
>
> The rest looks good to me.

Thanks!
diff mbox series

Patch

diff --git a/merge-recursive.c b/merge-recursive.c
index 37bb94fb4d..b762ecd7bd 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -3381,21 +3381,14 @@  static int process_entry(struct merge_options *opt,
 	return clean_merge;
 }
 
-int merge_trees(struct merge_options *opt,
-		struct tree *head,
-		struct tree *merge,
-		struct tree *common,
-		struct tree **result)
+static int merge_trees_internal(struct merge_options *opt,
+				struct tree *head,
+				struct tree *merge,
+				struct tree *common,
+				struct tree **result)
 {
 	struct index_state *istate = opt->repo->index;
 	int code, clean;
-	struct strbuf sb = STRBUF_INIT;
-
-	if (!opt->call_depth && repo_index_has_changes(opt->repo, head, &sb)) {
-		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
-		    sb.buf);
-		return -1;
-	}
 
 	if (opt->subtree_shift) {
 		merge = shift_tree_object(opt->repo, head, merge, opt->subtree_shift);
@@ -3499,11 +3492,11 @@  static struct commit_list *reverse_commit_list(struct commit_list *list)
  * Merge the commits h1 and h2, return the resulting virtual
  * commit object and a flag indicating the cleanness of the merge.
  */
-int merge_recursive(struct merge_options *opt,
-		    struct commit *h1,
-		    struct commit *h2,
-		    struct commit_list *ca,
-		    struct commit **result)
+static int merge_recursive_internal(struct merge_options *opt,
+				    struct commit *h1,
+				    struct commit *h2,
+				    struct commit_list *ca,
+				    struct commit **result)
 {
 	struct commit_list *iter;
 	struct commit *merged_common_ancestors;
@@ -3555,7 +3548,7 @@  int merge_recursive(struct merge_options *opt,
 		saved_b2 = opt->branch2;
 		opt->branch1 = "Temporary merge branch 1";
 		opt->branch2 = "Temporary merge branch 2";
-		if (merge_recursive(opt, merged_common_ancestors, iter->item,
+		if (merge_recursive_internal(opt, merged_common_ancestors, iter->item,
 				    NULL, &merged_common_ancestors) < 0)
 			return -1;
 		opt->branch1 = saved_b1;
@@ -3571,12 +3564,12 @@  int merge_recursive(struct merge_options *opt,
 		repo_read_index(opt->repo);
 
 	opt->ancestor = "merged common ancestors";
-	clean = merge_trees(opt,
-			    repo_get_commit_tree(opt->repo, h1),
-			    repo_get_commit_tree(opt->repo, h2),
-			    repo_get_commit_tree(opt->repo,
-						 merged_common_ancestors),
-			    &mrtree);
+	clean = merge_trees_internal(opt,
+				     repo_get_commit_tree(opt->repo, h1),
+				     repo_get_commit_tree(opt->repo, h2),
+				     repo_get_commit_tree(opt->repo,
+							  merged_common_ancestors),
+				     &mrtree);
 	if (clean < 0) {
 		flush_output(opt);
 		return clean;
@@ -3596,6 +3589,58 @@  int merge_recursive(struct merge_options *opt,
 	return clean;
 }
 
+static int merge_start(struct merge_options *opt, struct tree *head)
+{
+	struct strbuf sb = STRBUF_INIT;
+
+	assert(opt->branch1 && opt->branch2);
+
+	if (repo_index_has_changes(opt->repo, head, &sb)) {
+		err(opt, _("Your local changes to the following files would be overwritten by merge:\n  %s"),
+		    sb.buf);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void merge_finalize(struct merge_options *opt)
+{
+	/* Common code for wrapping up merges will be added here later */
+}
+
+int merge_trees(struct merge_options *opt,
+		struct tree *head,
+		struct tree *merge,
+		struct tree *common,
+		struct tree **result)
+{
+	int ret;
+
+	if (merge_start(opt, head))
+		return -1;
+	ret = merge_trees_internal(opt, head, merge, common, result);
+	merge_finalize(opt);
+
+	return ret;
+}
+
+int merge_recursive(struct merge_options *opt,
+		    struct commit *h1,
+		    struct commit *h2,
+		    struct commit_list *ca,
+		    struct commit **result)
+{
+	int ret;
+
+	if (merge_start(opt, repo_get_commit_tree(opt->repo, h1)))
+		return -1;
+	ret = merge_recursive_internal(opt, h1, h2, ca, result);
+	merge_finalize(opt);
+
+	return ret;
+}
+
 static struct commit *get_ref(struct repository *repo, const struct object_id *oid,
 			      const char *name)
 {