Message ID | 731b6bd15531fc7883a2c70275cea24ac686ab03.1620094339.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Optimization batch 11: avoid repeatedly detecting same renames | expand |
On 5/3/21 10:12 PM, Elijah Newren via GitGitGadget wrote: > From: Elijah Newren <newren@gmail.com> ... > Signed-off-by: Elijah Newren <newren@gmail.com> > --- > merge-ort.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 64 insertions(+), 2 deletions(-) > > diff --git a/merge-ort.c b/merge-ort.c > index 5523fc9e86b3..a342cc6344fd 100644 > --- a/merge-ort.c > +++ b/merge-ort.c > @@ -140,6 +140,30 @@ struct rename_info { > int callback_data_nr, callback_data_alloc; > char *callback_data_traverse_path; > > + /* > + * merge_trees: trees passed to the merge algorithm for the merge > + * > + * merge_trees records the trees passed to the merge algorithm. But, > + * this data also is stored in merge_result->priv. If a sequence of > + * merges are being done (such as when cherry-picking or rebasing), > + * the next merge can look at this and re-use information from > + * previous merges under certain cirumstances. s/cirumstances/circumstances/ > + * > + * See also all the cached_* variables. > + */ > + struct tree *merge_trees[3]; > + > + /* > + * cached_pairs_valid_side: which side's cached info can be reused > + * > + * See the description for merge_trees. For repeated merges, at most > + * only one side's cached information can be used. Valid values: > + * MERGE_SIDE2: cached data from side2 can be reused > + * MERGE_SIDE1: cached data from side1 can be reused > + * 0: no cached data can be reused > + */ > + int cached_pairs_valid_side; ... > +static void merge_check_renames_reusable(struct merge_options *opt, > + struct merge_result *result, > + struct tree *merge_base, > + struct tree *side1, > + struct tree *side2) > +{ > + struct rename_info *renames; > + struct tree **merge_trees; > + struct merge_options_internal *opti = result->priv; > + > + if (!opti) > + return; > + > + renames = &opti->renames; > + merge_trees = renames->merge_trees; > + /* merge_trees[0..2] will only be NULL if opti is */ > + assert(merge_trees[0] && merge_trees[1] && merge_trees[2]); > + > + /* Check if we meet a condition for re-using cached_pairs */ > + if ( oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) && > + oideq( &side1->object.oid, &result->tree->object.oid)) > + renames->cached_pairs_valid_side = MERGE_SIDE1; > + else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) && > + oideq( &side2->object.oid, &result->tree->object.oid)) I see how you used whitespace to align the different items in these conditions, but they are nonstandard so it's probably best to drop the extra spaces. > + renames->cached_pairs_valid_side = MERGE_SIDE2; > + else > + renames->cached_pairs_valid_side = 0; /* neither side valid */ > +} > + > /*** Function Grouping: merge_incore_*() and their internal variants ***/ > > /* > @@ -3949,7 +4002,16 @@ void merge_incore_nonrecursive(struct merge_options *opt, > > trace2_region_enter("merge", "merge_start", opt->repo); > assert(opt->ancestor != NULL); > + merge_check_renames_reusable(opt, result, merge_base, side1, side2); > merge_start(opt, result); > + /* > + * Record the trees used in this merge, so if there's a next merge in > + * a cherry-pick or rebase sequence it might be able to take advantage > + * of the cached_pairs in that next merge. > + */ > + opt->priv->renames.merge_trees[0] = merge_base; > + opt->priv->renames.merge_trees[1] = side1; > + opt->priv->renames.merge_trees[2] = side2; > trace2_region_leave("merge", "merge_start", opt->repo); > Again, the functionality seems reasonable. We're not quite to the punchline of the series. Thanks, -Stolee
diff --git a/merge-ort.c b/merge-ort.c index 5523fc9e86b3..a342cc6344fd 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -140,6 +140,30 @@ struct rename_info { int callback_data_nr, callback_data_alloc; char *callback_data_traverse_path; + /* + * merge_trees: trees passed to the merge algorithm for the merge + * + * merge_trees records the trees passed to the merge algorithm. But, + * this data also is stored in merge_result->priv. If a sequence of + * merges are being done (such as when cherry-picking or rebasing), + * the next merge can look at this and re-use information from + * previous merges under certain cirumstances. + * + * See also all the cached_* variables. + */ + struct tree *merge_trees[3]; + + /* + * cached_pairs_valid_side: which side's cached info can be reused + * + * See the description for merge_trees. For repeated merges, at most + * only one side's cached information can be used. Valid values: + * MERGE_SIDE2: cached data from side2 can be reused + * MERGE_SIDE1: cached data from side1 can be reused + * 0: no cached data can be reused + */ + int cached_pairs_valid_side; + /* * cached_pairs: Caching of renames and deletions. * @@ -462,6 +486,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strmap_func(&renames->cached_pairs[i], 1); strset_func(&renames->cached_irrelevant[i]); } + renames->cached_pairs_valid_side = 0; + renames->dir_rename_mask = 0; if (!reinitialize) { struct hashmap_iter iter; @@ -484,8 +510,6 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strmap_clear(&opti->output, 0); } - renames->dir_rename_mask = 0; - /* Clean out callback_data as well. */ FREE_AND_NULL(renames->callback_data); renames->callback_data_nr = renames->callback_data_alloc = 0; @@ -3802,6 +3826,35 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) trace2_region_leave("merge", "allocate/init", opt->repo); } +static void merge_check_renames_reusable(struct merge_options *opt, + struct merge_result *result, + struct tree *merge_base, + struct tree *side1, + struct tree *side2) +{ + struct rename_info *renames; + struct tree **merge_trees; + struct merge_options_internal *opti = result->priv; + + if (!opti) + return; + + renames = &opti->renames; + merge_trees = renames->merge_trees; + /* merge_trees[0..2] will only be NULL if opti is */ + assert(merge_trees[0] && merge_trees[1] && merge_trees[2]); + + /* Check if we meet a condition for re-using cached_pairs */ + if ( oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) && + oideq( &side1->object.oid, &result->tree->object.oid)) + renames->cached_pairs_valid_side = MERGE_SIDE1; + else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) && + oideq( &side2->object.oid, &result->tree->object.oid)) + renames->cached_pairs_valid_side = MERGE_SIDE2; + else + renames->cached_pairs_valid_side = 0; /* neither side valid */ +} + /*** Function Grouping: merge_incore_*() and their internal variants ***/ /* @@ -3949,7 +4002,16 @@ void merge_incore_nonrecursive(struct merge_options *opt, trace2_region_enter("merge", "merge_start", opt->repo); assert(opt->ancestor != NULL); + merge_check_renames_reusable(opt, result, merge_base, side1, side2); merge_start(opt, result); + /* + * Record the trees used in this merge, so if there's a next merge in + * a cherry-pick or rebase sequence it might be able to take advantage + * of the cached_pairs in that next merge. + */ + opt->priv->renames.merge_trees[0] = merge_base; + opt->priv->renames.merge_trees[1] = side1; + opt->priv->renames.merge_trees[2] = side2; trace2_region_leave("merge", "merge_start", opt->repo); merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);