@@ -374,6 +374,12 @@ struct dir_rename_info {
unsigned setup;
};
+static char *get_dirname(const char *filename)
+{
+ char *slash = strrchr(filename, '/');
+ return slash ? xstrndup(filename, slash-filename) : xstrdup("");
+}
+
static void dirname_munge(char *filename)
{
char *slash = strrchr(filename, '/');
@@ -651,6 +657,81 @@ static const char *get_basename(const char *filename)
return base ? base + 1 : filename;
}
+MAYBE_UNUSED
+static int idx_possible_rename(char *filename, struct dir_rename_info *info)
+{
+ /*
+ * Our comparison of files with the same basename (see
+ * find_basename_matches() below), is only helpful when after exact
+ * rename detection we have exactly one file with a given basename
+ * among the rename sources and also only exactly one file with
+ * that basename among the rename destinations. When we have
+ * multiple files with the same basename in either set, we do not
+ * know which to compare against. However, there are some
+ * filenames that occur in large numbers (particularly
+ * build-related filenames such as 'Makefile', '.gitignore', or
+ * 'build.gradle' that potentially exist within every single
+ * subdirectory), and for performance we want to be able to quickly
+ * find renames for these files too.
+ *
+ * The reason basename comparisons are a useful heuristic was that it
+ * is common for people to move files across directories while keeping
+ * their filename the same. If we had a way of determining or even
+ * making a good educated guess about which directory these non-unique
+ * basename files had moved the file to, we could check it.
+ * Luckily...
+ *
+ * When an entire directory is in fact renamed, we have two factors
+ * helping us out:
+ * (a) the original directory disappeared giving us a hint
+ * about when we can apply an extra heuristic.
+ * (a) we often have several files within that directory and
+ * subdirectories that are renamed without changes
+ * So, rules for a heuristic:
+ * (0) If there basename matches are non-unique (the condition under
+ * which this function is called) AND
+ * (1) the directory in which the file was found has disappeared
+ * (i.e. dirs_removed is non-NULL and has a relevant entry) THEN
+ * (2) use exact renames of files within the directory to determine
+ * where the directory is likely to have been renamed to. IF
+ * there is at least one exact rename from within that
+ * directory, we can proceed.
+ * (3) If there are multiple places the directory could have been
+ * renamed to based on exact renames, ignore all but one of them.
+ * Just use the destination with the most renames going to it.
+ * (4) Check if applying that directory rename to the original file
+ * would result in a destination filename that is in the
+ * potential rename set. If so, return the index of the
+ * destination file (the index within rename_dst).
+ * (5) Compare the original file and returned destination for
+ * similarity, and if they are sufficiently similar, record the
+ * rename.
+ *
+ * This function, idx_possible_rename(), is only responsible for (4).
+ * The conditions/steps in (1)-(3) are handled via setting up
+ * dir_rename_count and dir_rename_guess in
+ * initialize_dir_rename_info(). Steps (0) and (5) are handled by
+ * the caller of this function.
+ */
+ char *old_dir, *new_dir, *new_path;
+ int idx;
+
+ if (!info->setup)
+ return -1;
+
+ old_dir = get_dirname(filename);
+ new_dir = strmap_get(&info->dir_rename_guess, old_dir);
+ free(old_dir);
+ if (!new_dir)
+ return -1;
+
+ new_path = xstrfmt("%s/%s", new_dir, get_basename(filename));
+
+ idx = strintmap_get(&info->idx_map, new_path);
+ free(new_path);
+ return idx;
+}
+
static int find_basename_matches(struct diff_options *options,
int minimum_score,
struct dir_rename_info *info,