diff mbox series

[v2,7/8] merge-ort: skip rename detection entirely if possible

Message ID cd931286f24d66efbf6b0f0a1f520b58ac468f88.1615248599.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Optimization batch 9: avoid detecting irrelevant renames | expand

Commit Message

Elijah Newren March 9, 2021, 12:09 a.m. UTC
From: Elijah Newren <newren@gmail.com>

diffcore_rename_extended() will do a bunch of setup, then check for
exact renames, then abort before inexact rename detection if there are
no more sources or destinations that need to be matched.  It will
sometimes be the case, however, that either
  * we start with neither any sources or destinations
  * we start with no *relevant* sources
In the first of these two cases, the setup and exact rename detection
will be very cheap since there are 0 files to operate on.  In the second
case, it is quite possible to have thousands of files with none of the
source ones being relevant.  Avoid calling diffcore_rename_extended() or
even some of the setup before diffcore_rename_extended() when we can
determine that rename detection is unnecessary.

For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:        6.003 s ±  0.048 s     5.708 s ±  0.111 s
    mega-renames:    114.009 s ±  0.236 s   102.171 s ±  0.440 s
    just-one-mega:     3.489 s ±  0.017 s     3.471 s ±  0.015 s

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

Comments

Derrick Stolee March 9, 2021, 10:51 p.m. UTC | #1
On 3/8/2021 7:09 PM, Elijah Newren via GitGitGadget wrote:> +	goto simple_cleanup; /* collect_renames() handles some of cleanup */
> +
> +cleanup:
> +	/*
> +	 * Free now unneeded filepairs, which would have been handled
> +	 * in collect_renames() normally but we're about to skip that
> +	 * code...
> +	 */

"but we're about to skip that code"

Haven't you skipped it already, earlier in the above part? This should
instead say "but we skipped that code", right?

Thanks,
-Stolee
Elijah Newren March 9, 2021, 10:57 p.m. UTC | #2
On Tue, Mar 9, 2021 at 2:51 PM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 3/8/2021 7:09 PM, Elijah Newren via GitGitGadget wrote:> +   goto simple_cleanup; /* collect_renames() handles some of cleanup */
> > +
> > +cleanup:
> > +     /*
> > +      * Free now unneeded filepairs, which would have been handled
> > +      * in collect_renames() normally but we're about to skip that
> > +      * code...
> > +      */
>
> "but we're about to skip that code"
>
> Haven't you skipped it already, earlier in the above part? This should
> instead say "but we skipped that code", right?

Oh, indeed.  I wonder if I moved that comment around and didn't update
it.  Anyway yes, I'll fix it.
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 66892c63cee2..8602c7b8936f 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2158,6 +2158,19 @@  static int process_renames(struct merge_options *opt,
 	return clean_merge;
 }
 
+static inline int possible_side_renames(struct rename_info *renames,
+					unsigned side_index)
+{
+	return renames->pairs[side_index].nr > 0 &&
+	       !strset_empty(&renames->relevant_sources[side_index]);
+}
+
+static inline int possible_renames(struct rename_info *renames)
+{
+	return possible_side_renames(renames, 1) ||
+	       possible_side_renames(renames, 2);
+}
+
 static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 {
 	/*
@@ -2194,6 +2207,16 @@  static void detect_regular_renames(struct merge_options *opt,
 	struct diff_options diff_opts;
 	struct rename_info *renames = &opt->priv->renames;
 
+	if (!possible_side_renames(renames, side_index)) {
+		/*
+		 * No rename detection needed for this side, but we still need
+		 * to make sure 'adds' are marked correctly in case the other
+		 * side had directory renames.
+		 */
+		resolve_diffpair_statuses(&renames->pairs[side_index]);
+		return;
+	}
+
 	repo_diff_setup(opt->repo, &diff_opts);
 	diff_opts.flags.recursive = 1;
 	diff_opts.flags.rename_empty = 0;
@@ -2311,6 +2334,8 @@  static int detect_and_process_renames(struct merge_options *opt,
 	int need_dir_renames, s, clean = 1;
 
 	memset(&combined, 0, sizeof(combined));
+	if (!possible_renames(renames))
+		goto cleanup;
 
 	trace2_region_enter("merge", "regular renames", opt->repo);
 	detect_regular_renames(opt, MERGE_SIDE1);
@@ -2345,6 +2370,26 @@  static int detect_and_process_renames(struct merge_options *opt,
 	clean &= process_renames(opt, &combined);
 	trace2_region_leave("merge", "process renames", opt->repo);
 
+	goto simple_cleanup; /* collect_renames() handles some of cleanup */
+
+cleanup:
+	/*
+	 * Free now unneeded filepairs, which would have been handled
+	 * in collect_renames() normally but we're about to skip that
+	 * code...
+	 */
+	for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
+		struct diff_queue_struct *side_pairs;
+		int i;
+
+		side_pairs = &renames->pairs[s];
+		for (i = 0; i < side_pairs->nr; ++i) {
+			struct diff_filepair *p = side_pairs->queue[i];
+			diff_free_filepair(p);
+		}
+	}
+
+simple_cleanup:
 	/* Free memory for renames->pairs[] and combined */
 	for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
 		free(renames->pairs[s].queue);