diff mbox series

[v2,05/17] merge-ort: add outline of get_provisional_directory_renames()

Message ID bb4285250cdef2fcd16a22f8540968c871302c9f.1610055365.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Add directory rename detection to merge-ort | expand

Commit Message

Elijah Newren Jan. 7, 2021, 9:35 p.m. UTC
From: Elijah Newren <newren@gmail.com>

This function is based on merge-recursive.c's get_directory_renames(),
except that the first half has been split out into a not-yet-implemented
compute_rename_counts().  The primary difference here is our lack of the
non_unique_new_dir boolean in our strmap.  The lack of that field will
at first cause us to fail testcase 2b of t6423; however, future
optimizations will obviate the need for that ugly field so we have just
left it out.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

Comments

Taylor Blau Jan. 18, 2021, 8:10 p.m. UTC | #1
On Thu, Jan 07, 2021 at 09:35:53PM +0000, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@gmail.com>
>
> This function is based on merge-recursive.c's get_directory_renames(),
> except that the first half has been split out into a not-yet-implemented
> compute_rename_counts().  The primary difference here is our lack of the
> non_unique_new_dir boolean in our strmap.  The lack of that field will
> at first cause us to fail testcase 2b of t6423; however, future
> optimizations will obviate the need for that ugly field so we have just
> left it out.
>
> Signed-off-by: Elijah Newren <newren@gmail.com>
> ---
>  merge-ort.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 378ac495d09..73d3ff97f52 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -721,11 +721,69 @@ static int handle_content_merge(struct merge_options *opt,
>
>  /*** Function Grouping: functions related to directory rename detection ***/
>
> +static void compute_rename_counts(struct diff_queue_struct *pairs,
> +				  struct strmap *dir_rename_count,
> +				  struct strset *dirs_removed)
> +{
> +	die("Not yet implemented!");
> +}
> +
>  static void get_provisional_directory_renames(struct merge_options *opt,
>  					      unsigned side,
>  					      int *clean)
>  {
> -	die("Not yet implemented!");
> +	struct hashmap_iter iter;
> +	struct strmap_entry *entry;
> +	struct rename_info *renames = &opt->priv->renames;
> +
> +	compute_rename_counts(&renames->pairs[side],
> +			      &renames->dir_rename_count[side],
> +			      &renames->dirs_removed[side]);
> +	/*
> +	 * Collapse
> +	 *    dir_rename_count: old_directory -> {new_directory -> count}
> +	 * down to
> +	 *    dir_renames: old_directory -> best_new_directory
> +	 * where best_new_directory is the one with the unique highest count.
> +	 */
> +	strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
> +		const char *source_dir = entry->key;
> +		struct strintmap *counts = entry->value;
> +		struct hashmap_iter count_iter;
> +		struct strmap_entry *count_entry;
> +		int max = 0;
> +		int bad_max = 0;
> +		const char *best = NULL;
> +
> +		strintmap_for_each_entry(counts, &count_iter, count_entry) {
> +			const char *target_dir = count_entry->key;
> +			intptr_t count = (intptr_t)count_entry->value;

Just to make sure I understand what's going on here: you're storing the
whole int inside the pointer (and not pointing at it)?

> +			if (count == max)
> +				bad_max = max;
> +			else if (count > max) {
> +				max = count;
> +				best = target_dir;
> +			}
> +		}
> +
> +		if (max == 0)
> +			continue;

This is new from merge_recursive.c:get_directory_renames(). What is it
doing here?

> +		if (bad_max == max) {
> +			path_msg(opt, source_dir, 0,
> +			       _("CONFLICT (directory rename split): "
> +				 "Unclear where to rename %s to; it was "
> +				 "renamed to multiple other directories, with "
> +				 "no destination getting a majority of the "
> +				 "files."),
> +			       source_dir);
> +			*clean &= 0;

Also not a big deal, but why not simply '*clean = 0'?

> +		} else {
> +			strmap_put(&renames->dir_renames[side],
> +				   source_dir, (void*)best);

Ah, answering my onw question: this is indeed shoving the int into your
void*. That said, shouldn't this be '(void*)(intptr_t)best'?

Thanks,
Taylor
Elijah Newren Jan. 18, 2021, 8:34 p.m. UTC | #2
On Mon, Jan 18, 2021 at 12:10 PM Taylor Blau <me@ttaylorr.com> wrote:
>
> On Thu, Jan 07, 2021 at 09:35:53PM +0000, Elijah Newren via GitGitGadget wrote:
> > From: Elijah Newren <newren@gmail.com>
> >
> > This function is based on merge-recursive.c's get_directory_renames(),
> > except that the first half has been split out into a not-yet-implemented
> > compute_rename_counts().  The primary difference here is our lack of the
> > non_unique_new_dir boolean in our strmap.  The lack of that field will
> > at first cause us to fail testcase 2b of t6423; however, future
> > optimizations will obviate the need for that ugly field so we have just
> > left it out.
> >
> > Signed-off-by: Elijah Newren <newren@gmail.com>
> > ---
> >  merge-ort.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 59 insertions(+), 1 deletion(-)
> >
> > diff --git a/merge-ort.c b/merge-ort.c
> > index 378ac495d09..73d3ff97f52 100644
> > --- a/merge-ort.c
> > +++ b/merge-ort.c
> > @@ -721,11 +721,69 @@ static int handle_content_merge(struct merge_options *opt,
> >
> >  /*** Function Grouping: functions related to directory rename detection ***/
> >
> > +static void compute_rename_counts(struct diff_queue_struct *pairs,
> > +                               struct strmap *dir_rename_count,
> > +                               struct strset *dirs_removed)
> > +{
> > +     die("Not yet implemented!");
> > +}
> > +
> >  static void get_provisional_directory_renames(struct merge_options *opt,
> >                                             unsigned side,
> >                                             int *clean)
> >  {
> > -     die("Not yet implemented!");
> > +     struct hashmap_iter iter;
> > +     struct strmap_entry *entry;
> > +     struct rename_info *renames = &opt->priv->renames;
> > +
> > +     compute_rename_counts(&renames->pairs[side],
> > +                           &renames->dir_rename_count[side],
> > +                           &renames->dirs_removed[side]);
> > +     /*
> > +      * Collapse
> > +      *    dir_rename_count: old_directory -> {new_directory -> count}
> > +      * down to
> > +      *    dir_renames: old_directory -> best_new_directory
> > +      * where best_new_directory is the one with the unique highest count.
> > +      */
> > +     strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
> > +             const char *source_dir = entry->key;
> > +             struct strintmap *counts = entry->value;
> > +             struct hashmap_iter count_iter;
> > +             struct strmap_entry *count_entry;
> > +             int max = 0;
> > +             int bad_max = 0;
> > +             const char *best = NULL;
> > +
> > +             strintmap_for_each_entry(counts, &count_iter, count_entry) {
> > +                     const char *target_dir = count_entry->key;
> > +                     intptr_t count = (intptr_t)count_entry->value;
>
> Just to make sure I understand what's going on here: you're storing the
> whole int inside the pointer (and not pointing at it)?

Right, strmap just has (char *)keys and (void *)values.  The fact that
strintmap stores integers inside those values instead of pointers
means that I have to do casting.  That casting is often hidden behind
strintmap_get() calls, but since I'm not calling such a function here
but just accessing the raw strmap_entry fields, I need to cast myself.

> > +                     if (count == max)
> > +                             bad_max = max;
> > +                     else if (count > max) {
> > +                             max = count;
> > +                             best = target_dir;
> > +                     }
> > +             }
> > +
> > +             if (max == 0)
> > +                     continue;
>
> This is new from merge_recursive.c:get_directory_renames(). What is it
> doing here?

Ah, good catch.  That became necessary with an optimization I haven't
submitted yet.  I should probably pull these two lines out of this
patch and resubmit later with the other optimization.

>
> > +             if (bad_max == max) {
> > +                     path_msg(opt, source_dir, 0,
> > +                            _("CONFLICT (directory rename split): "
> > +                              "Unclear where to rename %s to; it was "
> > +                              "renamed to multiple other directories, with "
> > +                              "no destination getting a majority of the "
> > +                              "files."),
> > +                            source_dir);
> > +                     *clean &= 0;
>
> Also not a big deal, but why not simply '*clean = 0'?

Good question.  There might be a joke in there about this being a
dirty way of recording uncleanness, but what you suggest just looks
cleaner.  I'll fix it up.

>
> > +             } else {
> > +                     strmap_put(&renames->dir_renames[side],
> > +                                source_dir, (void*)best);
>
> Ah, answering my onw question: this is indeed shoving the int into your
> void*. That said, shouldn't this be '(void*)(intptr_t)best'?

??  best is not an int; it's a directory name.

You are correct that we shove an int into a void* elsewhere;
dir_rename_count is a map of {old_dir => {new_dir => int}} (as noted
in a comment where it was declared).

But this line of code is not for "dir_rename_count" but "dir_renames".
dir_renames is a map of {old_dir => new_dir}.  Thus, the value for the
strmap is meant to not be int but char*.  Here, best is a const char*
and the cast here is just to remove the constness so I can store in a
strmap -- I know that the strmap isn't going to modify the pointed-to
directory.
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 378ac495d09..73d3ff97f52 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -721,11 +721,69 @@  static int handle_content_merge(struct merge_options *opt,
 
 /*** Function Grouping: functions related to directory rename detection ***/
 
+static void compute_rename_counts(struct diff_queue_struct *pairs,
+				  struct strmap *dir_rename_count,
+				  struct strset *dirs_removed)
+{
+	die("Not yet implemented!");
+}
+
 static void get_provisional_directory_renames(struct merge_options *opt,
 					      unsigned side,
 					      int *clean)
 {
-	die("Not yet implemented!");
+	struct hashmap_iter iter;
+	struct strmap_entry *entry;
+	struct rename_info *renames = &opt->priv->renames;
+
+	compute_rename_counts(&renames->pairs[side],
+			      &renames->dir_rename_count[side],
+			      &renames->dirs_removed[side]);
+	/*
+	 * Collapse
+	 *    dir_rename_count: old_directory -> {new_directory -> count}
+	 * down to
+	 *    dir_renames: old_directory -> best_new_directory
+	 * where best_new_directory is the one with the unique highest count.
+	 */
+	strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
+		const char *source_dir = entry->key;
+		struct strintmap *counts = entry->value;
+		struct hashmap_iter count_iter;
+		struct strmap_entry *count_entry;
+		int max = 0;
+		int bad_max = 0;
+		const char *best = NULL;
+
+		strintmap_for_each_entry(counts, &count_iter, count_entry) {
+			const char *target_dir = count_entry->key;
+			intptr_t count = (intptr_t)count_entry->value;
+
+			if (count == max)
+				bad_max = max;
+			else if (count > max) {
+				max = count;
+				best = target_dir;
+			}
+		}
+
+		if (max == 0)
+			continue;
+
+		if (bad_max == max) {
+			path_msg(opt, source_dir, 0,
+			       _("CONFLICT (directory rename split): "
+				 "Unclear where to rename %s to; it was "
+				 "renamed to multiple other directories, with "
+				 "no destination getting a majority of the "
+				 "files."),
+			       source_dir);
+			*clean &= 0;
+		} else {
+			strmap_put(&renames->dir_renames[side],
+				   source_dir, (void*)best);
+		}
+	}
 }
 
 static void handle_directory_level_conflicts(struct merge_options *opt)