Message ID | d3d1738e670d5dbf1378fc5c3209b2e98234a771.1665973401.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | archive: Add --recurse-submodules to git-archive command | expand |
Hi Heather On 17/10/2022 03:23, Heather Lapointe via GitGitGadget wrote: > From: Heather Lapointe <alpha@alphaservcomputing.solutions> > > This supports traversal into an actual submodule for read_tree_at. > The logic is blocked on pathspec->recurse_submodules now, I'm struggling to understand what this is saying. > but previously hadn't been executed due to all fn() cases > returning early for submodules. > > Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions> > --- > tree.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------ > 1 file changed, 61 insertions(+), 27 deletions(-) > > diff --git a/tree.c b/tree.c > index 13f9173d45e..2a087c010f9 100644 > --- a/tree.c > +++ b/tree.c > @@ -8,6 +8,7 @@ > #include "alloc.h" > #include "tree-walk.h" > #include "repository.h" > +#include "pathspec.h" > > const char *tree_type = "tree"; > > @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r, > return -1; > } > > - if (S_ISDIR(entry.mode)) > + if (S_ISDIR(entry.mode)) { > oidcpy(&oid, &entry.oid); > - else if (S_ISGITLINK(entry.mode)) { > - struct commit *commit; > > - commit = lookup_commit(r, &entry.oid); > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + strbuf_addch(base, '/'); > + retval = read_tree_at(r, lookup_tree(r, &oid), > + base, pathspec, > + fn, context); > + strbuf_setlen(base, oldlen); > + if (retval) > + return -1; > + } else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) { > + struct commit *commit; > + struct repository subrepo; > + struct repository* subrepo_p = &subrepo; Normally we'd just use &subrepo wherever we want a pointer rather than defining an alias like this. For example it is common to see struct strbuf buf = STRBUF_INIT; strbuf_add(&buf, "hello world"); we don't define a buf_p variable > + struct tree* submodule_tree; > + char *submodule_rel_path; > + int name_base_len = 0; > + > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + submodule_rel_path = base->buf; > + // repo_submodule_init expects a path relative to submodule_prefix I found the comments in this section code code helpful, but single line comments should be formatted as /* single line comment */ > + if (r->submodule_prefix) { > + name_base_len = strlen(r->submodule_prefix); > + // we should always expect to start with submodule_prefix > + assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len)); Rather than using assert() we tend to use BUG() as that then provides a grep-able message. It also means that we wont have an out of bounds access if the invariant is violated when compiling with NDEBUG. So we could drop the comment and write if (strncmp(submodule_rel_path, r->submodule_prefix, name_base_len) BUG("missing submodule path prefix"); > + // strip the prefix > + submodule_rel_path += name_base_len; > + // if submodule_prefix doesn't end with a /, we want to get rid of that too I think there is a typo here - if the prefix does end with a / then we're dropping it. > + if (is_dir_sep(submodule_rel_path[0])) { > + submodule_rel_path++; > + } > + } > + > + if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid())) > + die("couldn't init submodule %s", base->buf); > + > + if (repo_read_index(subrepo_p) < 0) > + die("index file corrupt"); > + > + commit = lookup_commit(subrepo_p, &entry.oid); > if (!commit) > - die("Commit %s in submodule path %s%s not found", > + die("Commit %s in submodule path %s not found", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > - > - // FIXME: This is the wrong repo instance (it refers to the superproject) > - // it will always fail as is (will fix in later patch) > - // This current codepath isn't executed by any existing callbacks > - // so it wouldn't show up as an issue at this time. > - if (repo_parse_commit(r, commit)) Style comment for the patch that added this code. Multi-line comments should be formatted as /* * Multi-line * comment */ Best Wishes Phillip > - die("Invalid commit %s in submodule path %s%s", > + base->buf); > + > + if (repo_parse_commit(subrepo_p, commit)) > + die("Invalid commit %s in submodule path %s", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > + base->buf); > > - oidcpy(&oid, get_commit_tree_oid(commit)); > - } > - else > - continue; > + submodule_tree = repo_get_commit_tree(subrepo_p, commit); > + oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL); > > - len = tree_entry_len(&entry); > - strbuf_add(base, entry.path, len); > - strbuf_addch(base, '/'); > - retval = read_tree_at(r, lookup_tree(r, &oid), > - base, pathspec, > - fn, context); > - strbuf_setlen(base, oldlen); > - if (retval) > - return -1; > + strbuf_addch(base, '/'); > + > + retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid), > + base, pathspec, > + fn, context); > + if (retval) > + die("failed to read tree for %s", base->buf); > + strbuf_setlen(base, oldlen); > + repo_clear(subrepo_p); > + } > + // else, this is a file (or a submodule, but no pathspec->recurse_submodules) > } > return 0; > }
"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Heather Lapointe <alpha@alphaservcomputing.solutions> > > This supports traversal into an actual submodule for read_tree_at. > The logic is blocked on pathspec->recurse_submodules now, > but previously hadn't been executed due to all fn() cases > returning early for submodules. > > Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions> > --- > tree.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------ > 1 file changed, 61 insertions(+), 27 deletions(-) > > diff --git a/tree.c b/tree.c > index 13f9173d45e..2a087c010f9 100644 > --- a/tree.c > +++ b/tree.c > @@ -8,6 +8,7 @@ > #include "alloc.h" > #include "tree-walk.h" > #include "repository.h" > +#include "pathspec.h" > > const char *tree_type = "tree"; > > @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r, > return -1; > } > > - if (S_ISDIR(entry.mode)) > + if (S_ISDIR(entry.mode)) { > oidcpy(&oid, &entry.oid); > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + strbuf_addch(base, '/'); > + retval = read_tree_at(r, lookup_tree(r, &oid), > + base, pathspec, > + fn, context); > + strbuf_setlen(base, oldlen); > + if (retval) > + return -1; The diff output makes it appear as if we are now adding many extra processing to a normal directory case, but it actually folds the code that was originally outside the if/else if/ cascade here. So I think this is not breaking the normal directory case. > + } else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) { > + struct commit *commit; > + struct repository subrepo; > + struct repository* subrepo_p = &subrepo; > + struct tree* submodule_tree; In our codebase, star/asterisk for a pointer declaration sticks to the variable, not the type. cf. Documentation/CodingGuidelines > + char *submodule_rel_path; Funny that the new code sometimes gets it right ;-) > + int name_base_len = 0; > + > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + submodule_rel_path = base->buf; > + // repo_submodule_init expects a path relative to submodule_prefix We avoid // comments. > + if (r->submodule_prefix) { > + name_base_len = strlen(r->submodule_prefix); > + // we should always expect to start with submodule_prefix > + assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len)); > + // strip the prefix > + submodule_rel_path += name_base_len; > + // if submodule_prefix doesn't end with a /, we want to get rid of that too > + if (is_dir_sep(submodule_rel_path[0])) { > + submodule_rel_path++; > + } > + } > + > + if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid())) > + die("couldn't init submodule %s", base->buf); > + > + if (repo_read_index(subrepo_p) < 0) > + die("index file corrupt"); Why? You are going to ask the object store of the submodule repository, and to do so you do not need to have its index read into the core. > + commit = lookup_commit(subrepo_p, &entry.oid); > if (!commit) > - die("Commit %s in submodule path %s%s not found", > + die("Commit %s in submodule path %s not found", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > - > - // FIXME: This is the wrong repo instance (it refers to the superproject) > - // it will always fail as is (will fix in later patch) > - // This current codepath isn't executed by any existing callbacks > - // so it wouldn't show up as an issue at this time. > - if (repo_parse_commit(r, commit)) > - die("Invalid commit %s in submodule path %s%s", > + base->buf); > + > + if (repo_parse_commit(subrepo_p, commit)) > + die("Invalid commit %s in submodule path %s", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > + base->buf); > > - oidcpy(&oid, get_commit_tree_oid(commit)); > - } > - else > - continue; > + submodule_tree = repo_get_commit_tree(subrepo_p, commit); > + oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL); > > - len = tree_entry_len(&entry); > - strbuf_add(base, entry.path, len); > - strbuf_addch(base, '/'); > - retval = read_tree_at(r, lookup_tree(r, &oid), > - base, pathspec, > - fn, context); > - strbuf_setlen(base, oldlen); > - if (retval) > - return -1; > + strbuf_addch(base, '/'); > + > + retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid), > + base, pathspec, > + fn, context); > + if (retval) > + die("failed to read tree for %s", base->buf); > + strbuf_setlen(base, oldlen); > + repo_clear(subrepo_p); This is a lot of new code, which must be done correctly. An easier way out to use the add_submodule_odb() trick that the original code assumed becomes somewhat tempting (I guess we would do that in fn() that would tell us to recurse into this codepath upon seeing a gitlink entry). Then we wouldn't have had to touch any tree() calls that were taught to take "struct repository *" in earlier steps in this series. But at some point, we would need to bite the bullet and plumb the repository pointer through the callchain of more APIs, and this may be that point. I dunno. > + } > + // else, this is a file (or a submodule, but no pathspec->recurse_submodules) > } > return 0; > }
"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: > @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r, > return -1; > } > > - if (S_ISDIR(entry.mode)) > + if (S_ISDIR(entry.mode)) { > oidcpy(&oid, &entry.oid); > - else if (S_ISGITLINK(entry.mode)) { > - struct commit *commit; > > - commit = lookup_commit(r, &entry.oid); > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + strbuf_addch(base, '/'); > + retval = read_tree_at(r, lookup_tree(r, &oid), > + base, pathspec, > + fn, context); > + strbuf_setlen(base, oldlen); > + if (retval) > + return -1; > + } else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) { > + struct commit *commit; > + struct repository subrepo; > + struct repository* subrepo_p = &subrepo; > + struct tree* submodule_tree; > + char *submodule_rel_path; > + int name_base_len = 0; > + > + len = tree_entry_len(&entry); > + strbuf_add(base, entry.path, len); > + submodule_rel_path = base->buf; > + // repo_submodule_init expects a path relative to submodule_prefix > + if (r->submodule_prefix) { > + name_base_len = strlen(r->submodule_prefix); > + // we should always expect to start with submodule_prefix > + assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len)); > + // strip the prefix > + submodule_rel_path += name_base_len; > + // if submodule_prefix doesn't end with a /, we want to get rid of that too > + if (is_dir_sep(submodule_rel_path[0])) { > + submodule_rel_path++; > + } > + } > + > + if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid())) > + die("couldn't init submodule %s", base->buf); > + > + if (repo_read_index(subrepo_p) < 0) > + die("index file corrupt"); > + > + commit = lookup_commit(subrepo_p, &entry.oid); > if (!commit) > - die("Commit %s in submodule path %s%s not found", > + die("Commit %s in submodule path %s not found", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > - > - // FIXME: This is the wrong repo instance (it refers to the superproject) > - // it will always fail as is (will fix in later patch) > - // This current codepath isn't executed by any existing callbacks > - // so it wouldn't show up as an issue at this time. > - if (repo_parse_commit(r, commit)) > - die("Invalid commit %s in submodule path %s%s", > + base->buf); > + > + if (repo_parse_commit(subrepo_p, commit)) > + die("Invalid commit %s in submodule path %s", > oid_to_hex(&entry.oid), > - base->buf, entry.path); > + base->buf); > > - oidcpy(&oid, get_commit_tree_oid(commit)); > - } > - else > - continue; > + submodule_tree = repo_get_commit_tree(subrepo_p, commit); > + oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL); > > - len = tree_entry_len(&entry); > - strbuf_add(base, entry.path, len); > - strbuf_addch(base, '/'); > - retval = read_tree_at(r, lookup_tree(r, &oid), > - base, pathspec, > - fn, context); > - strbuf_setlen(base, oldlen); > - if (retval) > - return -1; > + strbuf_addch(base, '/'); > + > + retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid), > + base, pathspec, > + fn, context); > + if (retval) > + die("failed to read tree for %s", base->buf); > + strbuf_setlen(base, oldlen); > + repo_clear(subrepo_p); > + } > + // else, this is a file (or a submodule, but no pathspec->recurse_submodules) In this patch, we say that we can ignore a submodule when pathspec->recurse_submodules is 0, but unless I'm missing something, I don't think that's the case. The preimage is: else if (S_ISGITLINK(entry.mode)) { struct commit *commit; commit = lookup_commit(r, &entry.oid); if (!commit) die("Commit %s in submodule path %s%s not found", oid_to_hex(&entry.oid), base->buf, entry.path); /* ... */ if (repo_parse_commit(r, commit)) die("Invalid commit %s in submodule path %s%s", oid_to_hex(&entry.oid), base->buf, entry.path); oidcpy(&oid, get_commit_tree_oid(commit)); } else continue; len = tree_entry_len(&entry); strbuf_add(base, entry.path, len); strbuf_addch(base, '/'); retval = read_tree_at(r, lookup_tree(r, &oid), base, pathspec, fn, context); which isn't a no-op since we actually do recurse into the gitlink. I don't know whether the subsequent call actually succeeds though (e.g. maybe it always failed and it was just a de facto no-op?), but that's much harder to prove. Since this function has callers outside of "git archive", it would be better to be conservative and keep the original behavior in the S_ISGITLINK(entry.mode) && !pathspec->recurse_submodules case. > } > return 0; > } > -- > gitgitgadget
"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Heather Lapointe <alpha@alphaservcomputing.solutions> > > This supports traversal into an actual submodule for read_tree_at. > The logic is blocked on pathspec->recurse_submodules now, What do you mean by "blocked"? Do you mean only that pathspec->recurse_submodules needs to be specified, or do you also mean that no caller specifies pathspec->recurse_submodules now, so this code path is never executed? > but previously hadn't been executed due to all fn() cases > returning early for submodules. What do you mean by "previously hadn't been executed due to..."? At a glance, I would think that this new logic is introduced in this patch, so of course it would never have been previously executed. > + if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid())) I don't think this can be null_oid() here - it has to match the tree from which you constructed submodule_rel_path. (That tree may not be the tree at HEAD.)
diff --git a/tree.c b/tree.c index 13f9173d45e..2a087c010f9 100644 --- a/tree.c +++ b/tree.c @@ -8,6 +8,7 @@ #include "alloc.h" #include "tree-walk.h" #include "repository.h" +#include "pathspec.h" const char *tree_type = "tree"; @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r, return -1; } - if (S_ISDIR(entry.mode)) + if (S_ISDIR(entry.mode)) { oidcpy(&oid, &entry.oid); - else if (S_ISGITLINK(entry.mode)) { - struct commit *commit; - commit = lookup_commit(r, &entry.oid); + len = tree_entry_len(&entry); + strbuf_add(base, entry.path, len); + strbuf_addch(base, '/'); + retval = read_tree_at(r, lookup_tree(r, &oid), + base, pathspec, + fn, context); + strbuf_setlen(base, oldlen); + if (retval) + return -1; + } else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) { + struct commit *commit; + struct repository subrepo; + struct repository* subrepo_p = &subrepo; + struct tree* submodule_tree; + char *submodule_rel_path; + int name_base_len = 0; + + len = tree_entry_len(&entry); + strbuf_add(base, entry.path, len); + submodule_rel_path = base->buf; + // repo_submodule_init expects a path relative to submodule_prefix + if (r->submodule_prefix) { + name_base_len = strlen(r->submodule_prefix); + // we should always expect to start with submodule_prefix + assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len)); + // strip the prefix + submodule_rel_path += name_base_len; + // if submodule_prefix doesn't end with a /, we want to get rid of that too + if (is_dir_sep(submodule_rel_path[0])) { + submodule_rel_path++; + } + } + + if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid())) + die("couldn't init submodule %s", base->buf); + + if (repo_read_index(subrepo_p) < 0) + die("index file corrupt"); + + commit = lookup_commit(subrepo_p, &entry.oid); if (!commit) - die("Commit %s in submodule path %s%s not found", + die("Commit %s in submodule path %s not found", oid_to_hex(&entry.oid), - base->buf, entry.path); - - // FIXME: This is the wrong repo instance (it refers to the superproject) - // it will always fail as is (will fix in later patch) - // This current codepath isn't executed by any existing callbacks - // so it wouldn't show up as an issue at this time. - if (repo_parse_commit(r, commit)) - die("Invalid commit %s in submodule path %s%s", + base->buf); + + if (repo_parse_commit(subrepo_p, commit)) + die("Invalid commit %s in submodule path %s", oid_to_hex(&entry.oid), - base->buf, entry.path); + base->buf); - oidcpy(&oid, get_commit_tree_oid(commit)); - } - else - continue; + submodule_tree = repo_get_commit_tree(subrepo_p, commit); + oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL); - len = tree_entry_len(&entry); - strbuf_add(base, entry.path, len); - strbuf_addch(base, '/'); - retval = read_tree_at(r, lookup_tree(r, &oid), - base, pathspec, - fn, context); - strbuf_setlen(base, oldlen); - if (retval) - return -1; + strbuf_addch(base, '/'); + + retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid), + base, pathspec, + fn, context); + if (retval) + die("failed to read tree for %s", base->buf); + strbuf_setlen(base, oldlen); + repo_clear(subrepo_p); + } + // else, this is a file (or a submodule, but no pathspec->recurse_submodules) } return 0; }