Message ID | 78cd85d8dcc790251ce8235e649902cf6adf091a.1633440057.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Sparse Index: integrate with reset | expand |
On Tue, Oct 5, 2021 at 6:21 AM Victoria Dye via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Victoria Dye <vdye@github.com> > > In order to accurately reconstruct the cache tree in `prime_cache_tree_rec`, > the function must determine whether the currently-processing directory in > the tree is sparse or not. If it is not sparse, the tree is parsed and > subtree recursively constructed. If it is sparse, no subtrees are added to > the tree and the entry count is set to 1 (representing the sparse directory > itself). > > Signed-off-by: Victoria Dye <vdye@github.com> > --- > cache-tree.c | 44 +++++++++++++++++++++--- > cache.h | 10 ++++++ > read-cache.c | 22 ++++++++---- > t/t1092-sparse-checkout-compatibility.sh | 15 ++++++-- > 4 files changed, 78 insertions(+), 13 deletions(-) > > diff --git a/cache-tree.c b/cache-tree.c > index 9be19c85b66..9021669d682 100644 > --- a/cache-tree.c > +++ b/cache-tree.c > @@ -740,15 +740,29 @@ out: > return ret; > } > > +static void prime_cache_tree_sparse_dir(struct repository *r, > + struct cache_tree *it, > + struct tree *tree, > + struct strbuf *tree_path) > +{ > + > + oidcpy(&it->oid, &tree->object.oid); > + it->entry_count = 1; > + return; Why are 'r' and 'tree_path' passed to this function? > +} > + > static void prime_cache_tree_rec(struct repository *r, > struct cache_tree *it, > - struct tree *tree) > + struct tree *tree, > + struct strbuf *tree_path) > { > + struct strbuf subtree_path = STRBUF_INIT; > struct tree_desc desc; > struct name_entry entry; > int cnt; > > oidcpy(&it->oid, &tree->object.oid); > + Why the blank line addition here? > init_tree_desc(&desc, tree->buffer, tree->size); > cnt = 0; > while (tree_entry(&desc, &entry)) { > @@ -757,27 +771,49 @@ static void prime_cache_tree_rec(struct repository *r, > else { > struct cache_tree_sub *sub; > struct tree *subtree = lookup_tree(r, &entry.oid); > + > if (!subtree->object.parsed) > parse_tree(subtree); > sub = cache_tree_sub(it, entry.path); > sub->cache_tree = cache_tree(); > - prime_cache_tree_rec(r, sub->cache_tree, subtree); > + strbuf_reset(&subtree_path); > + strbuf_grow(&subtree_path, tree_path->len + entry.pathlen + 1); > + strbuf_addbuf(&subtree_path, tree_path); > + strbuf_add(&subtree_path, entry.path, entry.pathlen); > + strbuf_addch(&subtree_path, '/'); Reconstructing the full path each time? And despite only being useful for the sparse-index case? Would it be better to drop subtree_path from this function, then append entry.path + '/' here to tree_path, and then after the if-block below, call strbuf_setlen to remove the part that this function call added? That way, we don't need subtree_path, and don't have to copy the leading path every time. Also, maybe it'd be better to only do this strbuf manipulation if r->index->sparse_index, since it's not ever used otherwise? > + > + /* > + * If a sparse index is in use, the directory being processed may be > + * sparse. To confirm that, we can check whether an entry with that > + * exact name exists in the index. If it does, the created subtree > + * should be sparse. Otherwise, cache tree expansion should continue > + * as normal. > + */ > + if (r->index->sparse_index && > + index_entry_exists(r->index, subtree_path.buf, subtree_path.len)) > + prime_cache_tree_sparse_dir(r, sub->cache_tree, subtree, &subtree_path); > + else > + prime_cache_tree_rec(r, sub->cache_tree, subtree, &subtree_path); > cnt += sub->cache_tree->entry_count; > } > } > it->entry_count = cnt; > + > + strbuf_release(&subtree_path); > } > > void prime_cache_tree(struct repository *r, > struct index_state *istate, > struct tree *tree) > { > + struct strbuf tree_path = STRBUF_INIT; > + > trace2_region_enter("cache-tree", "prime_cache_tree", the_repository); > cache_tree_free(&istate->cache_tree); > istate->cache_tree = cache_tree(); > > - ensure_full_index(istate); > - prime_cache_tree_rec(r, istate->cache_tree, tree); > + prime_cache_tree_rec(r, istate->cache_tree, tree, &tree_path); > + strbuf_release(&tree_path); > istate->cache_changed |= CACHE_TREE_CHANGED; > trace2_region_leave("cache-tree", "prime_cache_tree", the_repository); > } > diff --git a/cache.h b/cache.h > index f6295f3b048..1d3e4665562 100644 > --- a/cache.h > +++ b/cache.h > @@ -816,6 +816,16 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na > */ > int index_name_pos(struct index_state *, const char *name, int namelen); > > +/* > + * Determines whether an entry with the given name exists within the > + * given index. The return value is 1 if an exact match is found, otherwise > + * it is 0. Note that, unlike index_name_pos, this function does not expand > + * the index if it is sparse. If an item exists within the full index but it > + * is contained within a sparse directory (and not in the sparse index), 0 is > + * returned. > + */ > +int index_entry_exists(struct index_state *, const char *name, int namelen); > + > /* > * Some functions return the negative complement of an insert position when a > * precise match was not found but a position was found where the entry would > diff --git a/read-cache.c b/read-cache.c > index f5d4385c408..ea1166895f8 100644 > --- a/read-cache.c > +++ b/read-cache.c > @@ -551,7 +551,10 @@ int cache_name_stage_compare(const char *name1, int len1, int stage1, const char > return 0; > } > > -static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage) > +static int index_name_stage_pos(struct index_state *istate, > + const char *name, int namelen, > + int stage, > + int search_sparse) It'd be nicer to make search_sparse an enum defined within this file, so that... > { > int first, last; > > @@ -570,7 +573,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in > first = next+1; > } > > - if (istate->sparse_index && > + if (search_sparse && istate->sparse_index && > first > 0) { > /* Note: first <= istate->cache_nr */ > struct cache_entry *ce = istate->cache[first - 1]; > @@ -586,7 +589,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in > ce_namelen(ce) < namelen && > !strncmp(name, ce->name, ce_namelen(ce))) { > ensure_full_index(istate); > - return index_name_stage_pos(istate, name, namelen, stage); > + return index_name_stage_pos(istate, name, namelen, stage, search_sparse); > } > } > > @@ -595,7 +598,12 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in > > int index_name_pos(struct index_state *istate, const char *name, int namelen) > { > - return index_name_stage_pos(istate, name, namelen, 0); > + return index_name_stage_pos(istate, name, namelen, 0, 1); ...this could use SEARCH_SPARSE or some name like that which is more meaningful than "1" here. > +} > + > +int index_entry_exists(struct index_state *istate, const char *name, int namelen) > +{ > + return index_name_stage_pos(istate, name, namelen, 0, 0) >= 0; ...and likewise this spot could use SEARCH_FULL or some name like that, which is more meaningful than the second "0". Similarly for multiple call sites below... > } > > int remove_index_entry_at(struct index_state *istate, int pos) > @@ -1222,7 +1230,7 @@ static int has_dir_name(struct index_state *istate, > */ > } > > - pos = index_name_stage_pos(istate, name, len, stage); > + pos = index_name_stage_pos(istate, name, len, stage, 1); > if (pos >= 0) { > /* > * Found one, but not so fast. This could > @@ -1322,7 +1330,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e > strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0) > pos = index_pos_to_insert_pos(istate->cache_nr); > else > - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); > + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1); > > /* existing match? Just replace it. */ > if (pos >= 0) { > @@ -1357,7 +1365,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e > if (!ok_to_replace) > return error(_("'%s' appears as both a file and as a directory"), > ce->name); > - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); > + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1); > pos = -pos-1; > } > return pos + 1; > diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh > index f0723a6ac97..e301ef5633a 100755 > --- a/t/t1092-sparse-checkout-compatibility.sh > +++ b/t/t1092-sparse-checkout-compatibility.sh > @@ -786,9 +786,9 @@ test_expect_success 'sparse-index is not expanded' ' > ensure_not_expanded checkout - && > ensure_not_expanded switch rename-out-to-out && > ensure_not_expanded switch - && > - git -C sparse-index reset --hard && > + ensure_not_expanded reset --hard && > ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && > - git -C sparse-index reset --hard && > + ensure_not_expanded reset --hard && > ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 && > > echo >>sparse-index/README.md && > @@ -798,6 +798,17 @@ test_expect_success 'sparse-index is not expanded' ' > echo >>sparse-index/untracked.txt && > ensure_not_expanded add . && > > + for ref in update-deep update-folder1 update-folder2 update-deep > + do > + echo >>sparse-index/README.md && > + ensure_not_expanded reset --hard $ref || return 1 > + done && > + > + ensure_not_expanded reset --hard update-deep && > + ensure_not_expanded reset --keep base && > + ensure_not_expanded reset --merge update-deep && > + ensure_not_expanded reset --hard && > + > ensure_not_expanded checkout -f update-deep && > test_config -C sparse-index pull.twohead ort && > ( > -- > gitgitgadget
On 05/10/21 20.20, Victoria Dye via GitGitGadget wrote: > From: Victoria Dye <vdye@github.com> > > In order to accurately reconstruct the cache tree in `prime_cache_tree_rec`, > the function must determine whether the currently-processing directory in > the tree is sparse or not. If it is not sparse, the tree is parsed and > subtree recursively constructed. If it is sparse, no subtrees are added to > the tree and the entry count is set to 1 (representing the sparse directory > itself). > Better say `If it is sparse, no subtrees ..., else the tree ...`
Elijah Newren wrote: >> +static void prime_cache_tree_sparse_dir(struct repository *r, >> + struct cache_tree *it, >> + struct tree *tree, >> + struct strbuf *tree_path) >> +{ >> + >> + oidcpy(&it->oid, &tree->object.oid); >> + it->entry_count = 1; >> + return; > > Why are 'r' and 'tree_path' passed to this function? > I mindlessly copied the function signature of `prime_cache_tree_rec` and didn't notice those variables weren't needed (I'll remove them in V3). >> +} >> + >> static void prime_cache_tree_rec(struct repository *r, >> struct cache_tree *it, >> - struct tree *tree) >> + struct tree *tree, >> + struct strbuf *tree_path) >> { >> + struct strbuf subtree_path = STRBUF_INIT; >> struct tree_desc desc; >> struct name_entry entry; >> int cnt; >> >> oidcpy(&it->oid, &tree->object.oid); >> + > > Why the blank line addition here? > My goal was to visually separate the parts of `prime_cache_tree_rec` that update the properties of the `tree` itself and the parts that deal with its entries. For me, it was helpful when reading and understanding what this function does and seemed like an good (minor) readability change. >> init_tree_desc(&desc, tree->buffer, tree->size); >> cnt = 0; >> while (tree_entry(&desc, &entry)) { >> @@ -757,27 +771,49 @@ static void prime_cache_tree_rec(struct repository *r, >> else { >> struct cache_tree_sub *sub; >> struct tree *subtree = lookup_tree(r, &entry.oid); >> + >> if (!subtree->object.parsed) >> parse_tree(subtree); >> sub = cache_tree_sub(it, entry.path); >> sub->cache_tree = cache_tree(); >> - prime_cache_tree_rec(r, sub->cache_tree, subtree); > >> + strbuf_reset(&subtree_path); >> + strbuf_grow(&subtree_path, tree_path->len + entry.pathlen + 1); >> + strbuf_addbuf(&subtree_path, tree_path); >> + strbuf_add(&subtree_path, entry.path, entry.pathlen); >> + strbuf_addch(&subtree_path, '/'); > > Reconstructing the full path each time? And despite only being useful > for the sparse-index case? > > Would it be better to drop subtree_path from this function, then > append entry.path + '/' here to tree_path, and then after the if-block > below, call strbuf_setlen to remove the part that this function call > added? That way, we don't need subtree_path, and don't have to copy > the leading path every time. > > Also, maybe it'd be better to only do this strbuf manipulation if > r->index->sparse_index, since it's not ever used otherwise? > [...] >> -static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage) >> +static int index_name_stage_pos(struct index_state *istate, >> + const char *name, int namelen, >> + int stage, >> + int search_sparse) > > It'd be nicer to make search_sparse an enum defined within this file, so that... > >> { >> int first, last; >> >> @@ -570,7 +573,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in >> first = next+1; >> } >> >> - if (istate->sparse_index && >> + if (search_sparse && istate->sparse_index && >> first > 0) { >> /* Note: first <= istate->cache_nr */ >> struct cache_entry *ce = istate->cache[first - 1]; >> @@ -586,7 +589,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in >> ce_namelen(ce) < namelen && >> !strncmp(name, ce->name, ce_namelen(ce))) { >> ensure_full_index(istate); >> - return index_name_stage_pos(istate, name, namelen, stage); >> + return index_name_stage_pos(istate, name, namelen, stage, search_sparse); >> } >> } >> >> @@ -595,7 +598,12 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in >> >> int index_name_pos(struct index_state *istate, const char *name, int namelen) >> { >> - return index_name_stage_pos(istate, name, namelen, 0); >> + return index_name_stage_pos(istate, name, namelen, 0, 1); > > ...this could use SEARCH_SPARSE or some name like that which is more > meaningful than "1" here. > >> +} >> + >> +int index_entry_exists(struct index_state *istate, const char *name, int namelen) >> +{ >> + return index_name_stage_pos(istate, name, namelen, 0, 0) >= 0; > > ...and likewise this spot could use SEARCH_FULL or some name like > that, which is more meaningful than the second "0". > > Similarly for multiple call sites below... > > I like all of these suggestions and will include them in the next version. Thanks!
diff --git a/cache-tree.c b/cache-tree.c index 9be19c85b66..9021669d682 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -740,15 +740,29 @@ out: return ret; } +static void prime_cache_tree_sparse_dir(struct repository *r, + struct cache_tree *it, + struct tree *tree, + struct strbuf *tree_path) +{ + + oidcpy(&it->oid, &tree->object.oid); + it->entry_count = 1; + return; +} + static void prime_cache_tree_rec(struct repository *r, struct cache_tree *it, - struct tree *tree) + struct tree *tree, + struct strbuf *tree_path) { + struct strbuf subtree_path = STRBUF_INIT; struct tree_desc desc; struct name_entry entry; int cnt; oidcpy(&it->oid, &tree->object.oid); + init_tree_desc(&desc, tree->buffer, tree->size); cnt = 0; while (tree_entry(&desc, &entry)) { @@ -757,27 +771,49 @@ static void prime_cache_tree_rec(struct repository *r, else { struct cache_tree_sub *sub; struct tree *subtree = lookup_tree(r, &entry.oid); + if (!subtree->object.parsed) parse_tree(subtree); sub = cache_tree_sub(it, entry.path); sub->cache_tree = cache_tree(); - prime_cache_tree_rec(r, sub->cache_tree, subtree); + strbuf_reset(&subtree_path); + strbuf_grow(&subtree_path, tree_path->len + entry.pathlen + 1); + strbuf_addbuf(&subtree_path, tree_path); + strbuf_add(&subtree_path, entry.path, entry.pathlen); + strbuf_addch(&subtree_path, '/'); + + /* + * If a sparse index is in use, the directory being processed may be + * sparse. To confirm that, we can check whether an entry with that + * exact name exists in the index. If it does, the created subtree + * should be sparse. Otherwise, cache tree expansion should continue + * as normal. + */ + if (r->index->sparse_index && + index_entry_exists(r->index, subtree_path.buf, subtree_path.len)) + prime_cache_tree_sparse_dir(r, sub->cache_tree, subtree, &subtree_path); + else + prime_cache_tree_rec(r, sub->cache_tree, subtree, &subtree_path); cnt += sub->cache_tree->entry_count; } } it->entry_count = cnt; + + strbuf_release(&subtree_path); } void prime_cache_tree(struct repository *r, struct index_state *istate, struct tree *tree) { + struct strbuf tree_path = STRBUF_INIT; + trace2_region_enter("cache-tree", "prime_cache_tree", the_repository); cache_tree_free(&istate->cache_tree); istate->cache_tree = cache_tree(); - ensure_full_index(istate); - prime_cache_tree_rec(r, istate->cache_tree, tree); + prime_cache_tree_rec(r, istate->cache_tree, tree, &tree_path); + strbuf_release(&tree_path); istate->cache_changed |= CACHE_TREE_CHANGED; trace2_region_leave("cache-tree", "prime_cache_tree", the_repository); } diff --git a/cache.h b/cache.h index f6295f3b048..1d3e4665562 100644 --- a/cache.h +++ b/cache.h @@ -816,6 +816,16 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na */ int index_name_pos(struct index_state *, const char *name, int namelen); +/* + * Determines whether an entry with the given name exists within the + * given index. The return value is 1 if an exact match is found, otherwise + * it is 0. Note that, unlike index_name_pos, this function does not expand + * the index if it is sparse. If an item exists within the full index but it + * is contained within a sparse directory (and not in the sparse index), 0 is + * returned. + */ +int index_entry_exists(struct index_state *, const char *name, int namelen); + /* * Some functions return the negative complement of an insert position when a * precise match was not found but a position was found where the entry would diff --git a/read-cache.c b/read-cache.c index f5d4385c408..ea1166895f8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -551,7 +551,10 @@ int cache_name_stage_compare(const char *name1, int len1, int stage1, const char return 0; } -static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage) +static int index_name_stage_pos(struct index_state *istate, + const char *name, int namelen, + int stage, + int search_sparse) { int first, last; @@ -570,7 +573,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in first = next+1; } - if (istate->sparse_index && + if (search_sparse && istate->sparse_index && first > 0) { /* Note: first <= istate->cache_nr */ struct cache_entry *ce = istate->cache[first - 1]; @@ -586,7 +589,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in ce_namelen(ce) < namelen && !strncmp(name, ce->name, ce_namelen(ce))) { ensure_full_index(istate); - return index_name_stage_pos(istate, name, namelen, stage); + return index_name_stage_pos(istate, name, namelen, stage, search_sparse); } } @@ -595,7 +598,12 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in int index_name_pos(struct index_state *istate, const char *name, int namelen) { - return index_name_stage_pos(istate, name, namelen, 0); + return index_name_stage_pos(istate, name, namelen, 0, 1); +} + +int index_entry_exists(struct index_state *istate, const char *name, int namelen) +{ + return index_name_stage_pos(istate, name, namelen, 0, 0) >= 0; } int remove_index_entry_at(struct index_state *istate, int pos) @@ -1222,7 +1230,7 @@ static int has_dir_name(struct index_state *istate, */ } - pos = index_name_stage_pos(istate, name, len, stage); + pos = index_name_stage_pos(istate, name, len, stage, 1); if (pos >= 0) { /* * Found one, but not so fast. This could @@ -1322,7 +1330,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0) pos = index_pos_to_insert_pos(istate->cache_nr); else - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1); /* existing match? Just replace it. */ if (pos >= 0) { @@ -1357,7 +1365,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e if (!ok_to_replace) return error(_("'%s' appears as both a file and as a directory"), ce->name); - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1); pos = -pos-1; } return pos + 1; diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index f0723a6ac97..e301ef5633a 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -786,9 +786,9 @@ test_expect_success 'sparse-index is not expanded' ' ensure_not_expanded checkout - && ensure_not_expanded switch rename-out-to-out && ensure_not_expanded switch - && - git -C sparse-index reset --hard && + ensure_not_expanded reset --hard && ensure_not_expanded checkout rename-out-to-out -- deep/deeper1 && - git -C sparse-index reset --hard && + ensure_not_expanded reset --hard && ensure_not_expanded restore -s rename-out-to-out -- deep/deeper1 && echo >>sparse-index/README.md && @@ -798,6 +798,17 @@ test_expect_success 'sparse-index is not expanded' ' echo >>sparse-index/untracked.txt && ensure_not_expanded add . && + for ref in update-deep update-folder1 update-folder2 update-deep + do + echo >>sparse-index/README.md && + ensure_not_expanded reset --hard $ref || return 1 + done && + + ensure_not_expanded reset --hard update-deep && + ensure_not_expanded reset --keep base && + ensure_not_expanded reset --merge update-deep && + ensure_not_expanded reset --hard && + ensure_not_expanded checkout -f update-deep && test_config -C sparse-index pull.twohead ort && (