diff mbox series

[v3,1/2] diffcore-rename: no point trying to find a match better than exact

Message ID a59c1960f6141e71bba3492e20111af458741c38.1613288101.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit f15eb7c1cf2674df69c1ff8aebdc5536a580c65e
Headers show
Series Optimization batch 6: make full use of exact renames | expand

Commit Message

Elijah Newren Feb. 14, 2021, 7:35 a.m. UTC
From: Elijah Newren <newren@gmail.com>

diffcore_rename() had some code to avoid having destination paths that
already had an exact rename detected from being re-checked for other
renames.  Source paths, however, were re-checked because we wanted to
allow the possibility of detecting copies.  But if copy detection isn't
turned on, then this merely amounts to attempting to find a
better-than-exact match, which naturally ends up being an expensive
no-op.  In particular, copy detection is never turned on by the merge
machinery.

For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:       14.263 s ±  0.053 s    14.119 s ±  0.101 s
    mega-renames:   5504.231 s ±  5.150 s  1802.044 s ±  0.828 s
    just-one-mega:   158.534 s ±  0.498 s    51.391 s ±  0.028 s

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 diffcore-rename.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/diffcore-rename.c b/diffcore-rename.c
index 8fe6c9384bcb..8b118628b4ef 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -463,9 +463,11 @@  void diffcore_rename(struct diff_options *options)
 	struct diff_score *mx;
 	int i, j, rename_count, skip_unmodified = 0;
 	int num_destinations, dst_cnt;
+	int num_sources, want_copies;
 	struct progress *progress = NULL;
 
 	trace2_region_enter("diff", "setup", options->repo);
+	want_copies = (detect_rename == DIFF_DETECT_COPY);
 	if (!minimum_score)
 		minimum_score = DEFAULT_RENAME_SCORE;
 
@@ -502,7 +504,7 @@  void diffcore_rename(struct diff_options *options)
 				p->one->rename_used++;
 			register_rename_src(p);
 		}
-		else if (detect_rename == DIFF_DETECT_COPY) {
+		else if (want_copies) {
 			/*
 			 * Increment the "rename_used" score by
 			 * one, to indicate ourselves as a user.
@@ -532,12 +534,15 @@  void diffcore_rename(struct diff_options *options)
 	 * files still remain as options for rename/copies!)
 	 */
 	num_destinations = (rename_dst_nr - rename_count);
+	num_sources = rename_src_nr;
+	if (!want_copies)
+		num_sources -= rename_count;
 
 	/* All done? */
-	if (!num_destinations)
+	if (!num_destinations || !num_sources)
 		goto cleanup;
 
-	switch (too_many_rename_candidates(num_destinations, rename_src_nr,
+	switch (too_many_rename_candidates(num_destinations, num_sources,
 					   options)) {
 	case 1:
 		goto cleanup;
@@ -553,7 +558,7 @@  void diffcore_rename(struct diff_options *options)
 	if (options->show_rename_progress) {
 		progress = start_delayed_progress(
 				_("Performing inexact rename detection"),
-				(uint64_t)num_destinations * (uint64_t)rename_src_nr);
+				(uint64_t)num_destinations * (uint64_t)num_sources);
 	}
 
 	mx = xcalloc(st_mult(NUM_CANDIDATE_PER_DST, num_destinations),
@@ -573,6 +578,9 @@  void diffcore_rename(struct diff_options *options)
 			struct diff_filespec *one = rename_src[j].p->one;
 			struct diff_score this_src;
 
+			if (one->rename_used && !want_copies)
+				continue;
+
 			if (skip_unmodified &&
 			    diff_unmodified_pair(rename_src[j].p))
 				continue;
@@ -594,7 +602,7 @@  void diffcore_rename(struct diff_options *options)
 		}
 		dst_cnt++;
 		display_progress(progress,
-				 (uint64_t)dst_cnt * (uint64_t)rename_src_nr);
+				 (uint64_t)dst_cnt * (uint64_t)num_sources);
 	}
 	stop_progress(&progress);
 
@@ -602,7 +610,7 @@  void diffcore_rename(struct diff_options *options)
 	STABLE_QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);
 
 	rename_count += find_renames(mx, dst_cnt, minimum_score, 0);
-	if (detect_rename == DIFF_DETECT_COPY)
+	if (want_copies)
 		rename_count += find_renames(mx, dst_cnt, minimum_score, 1);
 	free(mx);
 	trace2_region_leave("diff", "inexact renames", options->repo);