Message ID | bb4285250cdef2fcd16a22f8540968c871302c9f.1610055365.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Add directory rename detection to merge-ort | expand |
On Thu, Jan 07, 2021 at 09:35:53PM +0000, Elijah Newren via GitGitGadget wrote: > From: Elijah Newren <newren@gmail.com> > > This function is based on merge-recursive.c's get_directory_renames(), > except that the first half has been split out into a not-yet-implemented > compute_rename_counts(). The primary difference here is our lack of the > non_unique_new_dir boolean in our strmap. The lack of that field will > at first cause us to fail testcase 2b of t6423; however, future > optimizations will obviate the need for that ugly field so we have just > left it out. > > Signed-off-by: Elijah Newren <newren@gmail.com> > --- > merge-ort.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 59 insertions(+), 1 deletion(-) > > diff --git a/merge-ort.c b/merge-ort.c > index 378ac495d09..73d3ff97f52 100644 > --- a/merge-ort.c > +++ b/merge-ort.c > @@ -721,11 +721,69 @@ static int handle_content_merge(struct merge_options *opt, > > /*** Function Grouping: functions related to directory rename detection ***/ > > +static void compute_rename_counts(struct diff_queue_struct *pairs, > + struct strmap *dir_rename_count, > + struct strset *dirs_removed) > +{ > + die("Not yet implemented!"); > +} > + > static void get_provisional_directory_renames(struct merge_options *opt, > unsigned side, > int *clean) > { > - die("Not yet implemented!"); > + struct hashmap_iter iter; > + struct strmap_entry *entry; > + struct rename_info *renames = &opt->priv->renames; > + > + compute_rename_counts(&renames->pairs[side], > + &renames->dir_rename_count[side], > + &renames->dirs_removed[side]); > + /* > + * Collapse > + * dir_rename_count: old_directory -> {new_directory -> count} > + * down to > + * dir_renames: old_directory -> best_new_directory > + * where best_new_directory is the one with the unique highest count. > + */ > + strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) { > + const char *source_dir = entry->key; > + struct strintmap *counts = entry->value; > + struct hashmap_iter count_iter; > + struct strmap_entry *count_entry; > + int max = 0; > + int bad_max = 0; > + const char *best = NULL; > + > + strintmap_for_each_entry(counts, &count_iter, count_entry) { > + const char *target_dir = count_entry->key; > + intptr_t count = (intptr_t)count_entry->value; Just to make sure I understand what's going on here: you're storing the whole int inside the pointer (and not pointing at it)? > + if (count == max) > + bad_max = max; > + else if (count > max) { > + max = count; > + best = target_dir; > + } > + } > + > + if (max == 0) > + continue; This is new from merge_recursive.c:get_directory_renames(). What is it doing here? > + if (bad_max == max) { > + path_msg(opt, source_dir, 0, > + _("CONFLICT (directory rename split): " > + "Unclear where to rename %s to; it was " > + "renamed to multiple other directories, with " > + "no destination getting a majority of the " > + "files."), > + source_dir); > + *clean &= 0; Also not a big deal, but why not simply '*clean = 0'? > + } else { > + strmap_put(&renames->dir_renames[side], > + source_dir, (void*)best); Ah, answering my onw question: this is indeed shoving the int into your void*. That said, shouldn't this be '(void*)(intptr_t)best'? Thanks, Taylor
On Mon, Jan 18, 2021 at 12:10 PM Taylor Blau <me@ttaylorr.com> wrote: > > On Thu, Jan 07, 2021 at 09:35:53PM +0000, Elijah Newren via GitGitGadget wrote: > > From: Elijah Newren <newren@gmail.com> > > > > This function is based on merge-recursive.c's get_directory_renames(), > > except that the first half has been split out into a not-yet-implemented > > compute_rename_counts(). The primary difference here is our lack of the > > non_unique_new_dir boolean in our strmap. The lack of that field will > > at first cause us to fail testcase 2b of t6423; however, future > > optimizations will obviate the need for that ugly field so we have just > > left it out. > > > > Signed-off-by: Elijah Newren <newren@gmail.com> > > --- > > merge-ort.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 59 insertions(+), 1 deletion(-) > > > > diff --git a/merge-ort.c b/merge-ort.c > > index 378ac495d09..73d3ff97f52 100644 > > --- a/merge-ort.c > > +++ b/merge-ort.c > > @@ -721,11 +721,69 @@ static int handle_content_merge(struct merge_options *opt, > > > > /*** Function Grouping: functions related to directory rename detection ***/ > > > > +static void compute_rename_counts(struct diff_queue_struct *pairs, > > + struct strmap *dir_rename_count, > > + struct strset *dirs_removed) > > +{ > > + die("Not yet implemented!"); > > +} > > + > > static void get_provisional_directory_renames(struct merge_options *opt, > > unsigned side, > > int *clean) > > { > > - die("Not yet implemented!"); > > + struct hashmap_iter iter; > > + struct strmap_entry *entry; > > + struct rename_info *renames = &opt->priv->renames; > > + > > + compute_rename_counts(&renames->pairs[side], > > + &renames->dir_rename_count[side], > > + &renames->dirs_removed[side]); > > + /* > > + * Collapse > > + * dir_rename_count: old_directory -> {new_directory -> count} > > + * down to > > + * dir_renames: old_directory -> best_new_directory > > + * where best_new_directory is the one with the unique highest count. > > + */ > > + strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) { > > + const char *source_dir = entry->key; > > + struct strintmap *counts = entry->value; > > + struct hashmap_iter count_iter; > > + struct strmap_entry *count_entry; > > + int max = 0; > > + int bad_max = 0; > > + const char *best = NULL; > > + > > + strintmap_for_each_entry(counts, &count_iter, count_entry) { > > + const char *target_dir = count_entry->key; > > + intptr_t count = (intptr_t)count_entry->value; > > Just to make sure I understand what's going on here: you're storing the > whole int inside the pointer (and not pointing at it)? Right, strmap just has (char *)keys and (void *)values. The fact that strintmap stores integers inside those values instead of pointers means that I have to do casting. That casting is often hidden behind strintmap_get() calls, but since I'm not calling such a function here but just accessing the raw strmap_entry fields, I need to cast myself. > > + if (count == max) > > + bad_max = max; > > + else if (count > max) { > > + max = count; > > + best = target_dir; > > + } > > + } > > + > > + if (max == 0) > > + continue; > > This is new from merge_recursive.c:get_directory_renames(). What is it > doing here? Ah, good catch. That became necessary with an optimization I haven't submitted yet. I should probably pull these two lines out of this patch and resubmit later with the other optimization. > > > + if (bad_max == max) { > > + path_msg(opt, source_dir, 0, > > + _("CONFLICT (directory rename split): " > > + "Unclear where to rename %s to; it was " > > + "renamed to multiple other directories, with " > > + "no destination getting a majority of the " > > + "files."), > > + source_dir); > > + *clean &= 0; > > Also not a big deal, but why not simply '*clean = 0'? Good question. There might be a joke in there about this being a dirty way of recording uncleanness, but what you suggest just looks cleaner. I'll fix it up. > > > + } else { > > + strmap_put(&renames->dir_renames[side], > > + source_dir, (void*)best); > > Ah, answering my onw question: this is indeed shoving the int into your > void*. That said, shouldn't this be '(void*)(intptr_t)best'? ?? best is not an int; it's a directory name. You are correct that we shove an int into a void* elsewhere; dir_rename_count is a map of {old_dir => {new_dir => int}} (as noted in a comment where it was declared). But this line of code is not for "dir_rename_count" but "dir_renames". dir_renames is a map of {old_dir => new_dir}. Thus, the value for the strmap is meant to not be int but char*. Here, best is a const char* and the cast here is just to remove the constness so I can store in a strmap -- I know that the strmap isn't going to modify the pointed-to directory.
diff --git a/merge-ort.c b/merge-ort.c index 378ac495d09..73d3ff97f52 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,11 +721,69 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +static void compute_rename_counts(struct diff_queue_struct *pairs, + struct strmap *dir_rename_count, + struct strset *dirs_removed) +{ + die("Not yet implemented!"); +} + static void get_provisional_directory_renames(struct merge_options *opt, unsigned side, int *clean) { - die("Not yet implemented!"); + struct hashmap_iter iter; + struct strmap_entry *entry; + struct rename_info *renames = &opt->priv->renames; + + compute_rename_counts(&renames->pairs[side], + &renames->dir_rename_count[side], + &renames->dirs_removed[side]); + /* + * Collapse + * dir_rename_count: old_directory -> {new_directory -> count} + * down to + * dir_renames: old_directory -> best_new_directory + * where best_new_directory is the one with the unique highest count. + */ + strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) { + const char *source_dir = entry->key; + struct strintmap *counts = entry->value; + struct hashmap_iter count_iter; + struct strmap_entry *count_entry; + int max = 0; + int bad_max = 0; + const char *best = NULL; + + strintmap_for_each_entry(counts, &count_iter, count_entry) { + const char *target_dir = count_entry->key; + intptr_t count = (intptr_t)count_entry->value; + + if (count == max) + bad_max = max; + else if (count > max) { + max = count; + best = target_dir; + } + } + + if (max == 0) + continue; + + if (bad_max == max) { + path_msg(opt, source_dir, 0, + _("CONFLICT (directory rename split): " + "Unclear where to rename %s to; it was " + "renamed to multiple other directories, with " + "no destination getting a majority of the " + "files."), + source_dir); + *clean &= 0; + } else { + strmap_put(&renames->dir_renames[side], + source_dir, (void*)best); + } + } } static void handle_directory_level_conflicts(struct merge_options *opt)