diff mbox series

[v2,07/13] merge-ort: populate caches of rename detection results

Message ID 02d517f052a35a952c726e7e941650ce424abb85.1620094339.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Optimization batch 11: avoid repeatedly detecting same renames | expand

Commit Message

Elijah Newren May 4, 2021, 2:12 a.m. UTC
From: Elijah Newren <newren@gmail.com>

Fill in cache_pairs, cached_target_names, and cached_irrelevant based on
rename detection results.  Future commits will make use of these values.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 2 deletions(-)

Comments

Derrick Stolee May 17, 2021, 1:51 p.m. UTC | #1
On 5/3/21 10:12 PM, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@gmail.com>
> 
> Fill in cache_pairs, cached_target_names, and cached_irrelevant based on
> rename detection results.  Future commits will make use of these values.

Thank you for continuing to break this down into nice-sized pieces.

> +static void possibly_cache_new_pair(struct rename_info *renames,
> +				    struct diff_filepair *p,
> +				    unsigned side,
> +				    char *new_path)
> +{
> +	char *old_value;
> +	int dir_renamed_side = 0;
> +
> +	if (new_path) {
> +		/*
> +		 * Directory renames happen on the other side of history from
> +		 * the side that adds new files to the old directory.
> +		 */
> +		dir_renamed_side = 3 - side;

Neat trick. Side is in { 1, 2 } so this makes sense.

> +	} else {
> +		int val = strintmap_get(&renames->relevant_sources[side],
> +					p->one->path);
> +		if (val == RELEVANT_NO_MORE) {
> +			assert(p->status == 'D');
> +			strset_add(&renames->cached_irrelevant[side],
> +				   p->one->path);

Ok, I see a transition here from a relevant side to an
irrelevant one.

> +		}
> +		if (val <= 0)
> +			return;
> +	}
> +
> +	if (p->status == 'D') {
> +		/*
> +		 * If we already had this delete, we'll just set it's value
> +		 * to NULL again, so no harm.
> +		 */
> +		strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
> +	} else if (p->status == 'R') {
> +		if (new_path) {
> +			new_path = xstrdup(new_path);
> +			old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
> +					       p->two->path, new_path);
> +			strset_add(&renames->cached_target_names[dir_renamed_side],
> +				   new_path);
> +			assert(!old_value);

This assert implies that p->status == 'R' only if this is the
first side (and first commit) to show a rename, right?

> +		}
> +		if (!new_path)
> +			new_path = p->two->path;
> +		new_path = xstrdup(new_path);

If new_path was provided as non-NULL, then this is the second
time we are dup-ing it. However, that seems correct because we
want a different copy or every time we add it to the cached_pairs
and cached_target_names data.

> +		old_value = strmap_put(&renames->cached_pairs[side],
> +				       p->one->path, new_path);
> +		strset_add(&renames->cached_target_names[side],
> +			   new_path);

Since we appear to be doing this in multiple places, would this
be a good place for a helper method? We could have it take a
`const char *new_path` and have the helper manage the `xstrdup()`
so we never forget to do that exactly once per insert to these
sets.

> +		free(old_value);
> +	} else if (p->status == 'A' && new_path) {
> +		new_path = xstrdup(new_path);
> +		old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
> +				       p->two->path, new_path);
> +		strset_add(&renames->cached_target_names[dir_renamed_side],
> +			   new_path);
> +		assert(!old_value);

And here's the third instance, making the "three is many" rule
kick in. A helper method would help make this easier. You can
also have a parameter corresponding to whether you need to
free() the old_value or assert it is NULL.

> +	}
> +}
> +
>  static int compare_pairs(const void *a_, const void *b_)
>  {
>  	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
> @@ -2415,6 +2474,7 @@ static int collect_renames(struct merge_options *opt,
>  		char *new_path; /* non-NULL only with directory renames */
>  
>  		if (p->status != 'A' && p->status != 'R') {
> +			possibly_cache_new_pair(renames, p, side_index, NULL);
>  			diff_free_filepair(p);
>  			continue;
>  		}
> @@ -2426,11 +2486,11 @@ static int collect_renames(struct merge_options *opt,
>  						      &collisions,
>  						      &clean);
>  
> +		possibly_cache_new_pair(renames, p, side_index, new_path);
>  		if (p->status != 'R' && !new_path) {
>  			diff_free_filepair(p);
>  			continue;
>  		}
> -

nit: this deletion seems unnecessary.

>  		if (new_path)
>  			apply_directory_rename_modifications(opt, p, new_path);
>  
> @@ -3701,8 +3761,16 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
>  					 NULL, 1);
>  		strmap_init_with_options(&renames->dir_renames[i],
>  					 NULL, 0);
> +		/*
> +		 * relevant_sources uses -1 for the default, because we need
> +		 * to be able to distinguish not-in-strintmap from valid
> +		 * relevant_source values from enum file_rename_relevance.
> +		 * In particular, possibly_cache_new_pair() expects a negative
> +		 * value for not-found entries.
> +		 */
>  		strintmap_init_with_options(&renames->relevant_sources[i],
> -					    0, NULL, 0);
> +					    -1 /* explicitly invalid */,
> +					    NULL, 0);
>  		strmap_init_with_options(&renames->cached_pairs[i],
>  					 NULL, 1);
>  		strset_init_with_options(&renames->cached_irrelevant[i],
> 

Functionally looks good. I just had some nits about organization.

Thanks,
-Stolee
Elijah Newren May 20, 2021, 12:48 a.m. UTC | #2
On Mon, May 17, 2021 at 6:51 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 5/3/21 10:12 PM, Elijah Newren via GitGitGadget wrote:
> > From: Elijah Newren <newren@gmail.com>
> >
> > Fill in cache_pairs, cached_target_names, and cached_irrelevant based on
> > rename detection results.  Future commits will make use of these values.
>
> Thank you for continuing to break this down into nice-sized pieces.
>
> > +static void possibly_cache_new_pair(struct rename_info *renames,
> > +                                 struct diff_filepair *p,
> > +                                 unsigned side,
> > +                                 char *new_path)
> > +{
> > +     char *old_value;
> > +     int dir_renamed_side = 0;
> > +
> > +     if (new_path) {
> > +             /*
> > +              * Directory renames happen on the other side of history from
> > +              * the side that adds new files to the old directory.
> > +              */
> > +             dir_renamed_side = 3 - side;
>
> Neat trick. Side is in { 1, 2 } so this makes sense.
>
> > +     } else {
> > +             int val = strintmap_get(&renames->relevant_sources[side],
> > +                                     p->one->path);
> > +             if (val == RELEVANT_NO_MORE) {
> > +                     assert(p->status == 'D');
> > +                     strset_add(&renames->cached_irrelevant[side],
> > +                                p->one->path);
>
> Ok, I see a transition here from a relevant side to an
> irrelevant one.
>
> > +             }
> > +             if (val <= 0)
> > +                     return;
> > +     }
> > +
> > +     if (p->status == 'D') {
> > +             /*
> > +              * If we already had this delete, we'll just set it's value
> > +              * to NULL again, so no harm.
> > +              */
> > +             strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
> > +     } else if (p->status == 'R') {
> > +             if (new_path) {
> > +                     new_path = xstrdup(new_path);
> > +                     old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
> > +                                            p->two->path, new_path);
> > +                     strset_add(&renames->cached_target_names[dir_renamed_side],
> > +                                new_path);
> > +                     assert(!old_value);
>
> This assert implies that p->status == 'R' only if this is the
> first side (and first commit) to show a rename, right?

Um, this assert implies that p->two->path was not already found in
renames->cached_pairs[dir_renamed_side].

>
> > +             }
> > +             if (!new_path)
> > +                     new_path = p->two->path;
> > +             new_path = xstrdup(new_path);
>
> If new_path was provided as non-NULL, then this is the second
> time we are dup-ing it. However, that seems correct because we
> want a different copy or every time we add it to the cached_pairs
> and cached_target_names data.
>
> > +             old_value = strmap_put(&renames->cached_pairs[side],
> > +                                    p->one->path, new_path);
> > +             strset_add(&renames->cached_target_names[side],
> > +                        new_path);
>
> Since we appear to be doing this in multiple places, would this
> be a good place for a helper method? We could have it take a
> `const char *new_path` and have the helper manage the `xstrdup()`
> so we never forget to do that exactly once per insert to these
> sets.

Makes sense.

> > +             free(old_value);
> > +     } else if (p->status == 'A' && new_path) {
> > +             new_path = xstrdup(new_path);
> > +             old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
> > +                                    p->two->path, new_path);
> > +             strset_add(&renames->cached_target_names[dir_renamed_side],
> > +                        new_path);
> > +             assert(!old_value);
>
> And here's the third instance, making the "three is many" rule
> kick in. A helper method would help make this easier. You can
> also have a parameter corresponding to whether you need to
> free() the old_value or assert it is NULL.

Yep, I'll add a helper.

>
> > +     }
> > +}
> > +
> >  static int compare_pairs(const void *a_, const void *b_)
> >  {
> >       const struct diff_filepair *a = *((const struct diff_filepair **)a_);
> > @@ -2415,6 +2474,7 @@ static int collect_renames(struct merge_options *opt,
> >               char *new_path; /* non-NULL only with directory renames */
> >
> >               if (p->status != 'A' && p->status != 'R') {
> > +                     possibly_cache_new_pair(renames, p, side_index, NULL);
> >                       diff_free_filepair(p);
> >                       continue;
> >               }
> > @@ -2426,11 +2486,11 @@ static int collect_renames(struct merge_options *opt,
> >                                                     &collisions,
> >                                                     &clean);
> >
> > +             possibly_cache_new_pair(renames, p, side_index, new_path);
> >               if (p->status != 'R' && !new_path) {
> >                       diff_free_filepair(p);
> >                       continue;
> >               }
> > -
>
> nit: this deletion seems unnecessary.

Will fix.

> >               if (new_path)
> >                       apply_directory_rename_modifications(opt, p, new_path);
> >
> > @@ -3701,8 +3761,16 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
> >                                        NULL, 1);
> >               strmap_init_with_options(&renames->dir_renames[i],
> >                                        NULL, 0);
> > +             /*
> > +              * relevant_sources uses -1 for the default, because we need
> > +              * to be able to distinguish not-in-strintmap from valid
> > +              * relevant_source values from enum file_rename_relevance.
> > +              * In particular, possibly_cache_new_pair() expects a negative
> > +              * value for not-found entries.
> > +              */
> >               strintmap_init_with_options(&renames->relevant_sources[i],
> > -                                         0, NULL, 0);
> > +                                         -1 /* explicitly invalid */,
> > +                                         NULL, 0);
> >               strmap_init_with_options(&renames->cached_pairs[i],
> >                                        NULL, 1);
> >               strset_init_with_options(&renames->cached_irrelevant[i],
> >
>
> Functionally looks good. I just had some nits about organization.

As always, thanks for the review and the helpful suggestions!
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 8602f88a960c..5523fc9e86b3 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2333,6 +2333,65 @@  static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 	}
 }
 
+static void possibly_cache_new_pair(struct rename_info *renames,
+				    struct diff_filepair *p,
+				    unsigned side,
+				    char *new_path)
+{
+	char *old_value;
+	int dir_renamed_side = 0;
+
+	if (new_path) {
+		/*
+		 * Directory renames happen on the other side of history from
+		 * the side that adds new files to the old directory.
+		 */
+		dir_renamed_side = 3 - side;
+	} else {
+		int val = strintmap_get(&renames->relevant_sources[side],
+					p->one->path);
+		if (val == RELEVANT_NO_MORE) {
+			assert(p->status == 'D');
+			strset_add(&renames->cached_irrelevant[side],
+				   p->one->path);
+		}
+		if (val <= 0)
+			return;
+	}
+
+	if (p->status == 'D') {
+		/*
+		 * If we already had this delete, we'll just set it's value
+		 * to NULL again, so no harm.
+		 */
+		strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
+	} else if (p->status == 'R') {
+		if (new_path) {
+			new_path = xstrdup(new_path);
+			old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
+					       p->two->path, new_path);
+			strset_add(&renames->cached_target_names[dir_renamed_side],
+				   new_path);
+			assert(!old_value);
+		}
+		if (!new_path)
+			new_path = p->two->path;
+		new_path = xstrdup(new_path);
+		old_value = strmap_put(&renames->cached_pairs[side],
+				       p->one->path, new_path);
+		strset_add(&renames->cached_target_names[side],
+			   new_path);
+		free(old_value);
+	} else if (p->status == 'A' && new_path) {
+		new_path = xstrdup(new_path);
+		old_value = strmap_put(&renames->cached_pairs[dir_renamed_side],
+				       p->two->path, new_path);
+		strset_add(&renames->cached_target_names[dir_renamed_side],
+			   new_path);
+		assert(!old_value);
+	}
+}
+
 static int compare_pairs(const void *a_, const void *b_)
 {
 	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
@@ -2415,6 +2474,7 @@  static int collect_renames(struct merge_options *opt,
 		char *new_path; /* non-NULL only with directory renames */
 
 		if (p->status != 'A' && p->status != 'R') {
+			possibly_cache_new_pair(renames, p, side_index, NULL);
 			diff_free_filepair(p);
 			continue;
 		}
@@ -2426,11 +2486,11 @@  static int collect_renames(struct merge_options *opt,
 						      &collisions,
 						      &clean);
 
+		possibly_cache_new_pair(renames, p, side_index, new_path);
 		if (p->status != 'R' && !new_path) {
 			diff_free_filepair(p);
 			continue;
 		}
-
 		if (new_path)
 			apply_directory_rename_modifications(opt, p, new_path);
 
@@ -3701,8 +3761,16 @@  static void merge_start(struct merge_options *opt, struct merge_result *result)
 					 NULL, 1);
 		strmap_init_with_options(&renames->dir_renames[i],
 					 NULL, 0);
+		/*
+		 * relevant_sources uses -1 for the default, because we need
+		 * to be able to distinguish not-in-strintmap from valid
+		 * relevant_source values from enum file_rename_relevance.
+		 * In particular, possibly_cache_new_pair() expects a negative
+		 * value for not-found entries.
+		 */
 		strintmap_init_with_options(&renames->relevant_sources[i],
-					    0, NULL, 0);
+					    -1 /* explicitly invalid */,
+					    NULL, 0);
 		strmap_init_with_options(&renames->cached_pairs[i],
 					 NULL, 1);
 		strset_init_with_options(&renames->cached_irrelevant[i],