Message ID | 4672e3d958625cd76eb8056ab434e9a37f52661e.1665973401.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | archive: Add --recurse-submodules to git-archive command | expand |
"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: > index 34549d849f1..f81ef741487 100644 > --- a/archive.c > +++ b/archive.c > @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, > oidcpy(&d->oid, oid); > } > > +static void queue_submodule( > + struct repository *superproject, > + const struct object_id *oid, > + struct strbuf *base, const char *filename, > + unsigned mode, struct archiver_context *c) > +{ > + struct repository subrepo; > + > + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) > + return; > + > + if (repo_read_index(&subrepo) < 0) > + die("index file corrupt"); > + > + queue_directory(oid, base, filename, mode, c); > + > + repo_clear(&subrepo); > +} > + This bit is puzzling to me because we init the submodule, read its index, and then don't read objects from it at all. How does this work when we aren't reading objects from the submodule we init here? My guess is that read_tree() is already doing the heavy lifting of recursing into submodules, so we don't need to worry any more about init-ing submodules in archive.c, which is great. So in effect, this is just checking whether we can read the submodule and its index. We can drop this check since we already do that check in read_tree(). What's much more surprising is that you can delete the entire function body (even queue_directory()!) and the tests still pass! The tests are definitely testing what they say they are (I've also checked the tarballs), so I'm not sure what's going on. I commented out queue_directory() in the S_ISDIR case, and the only test failures I saw were: - t5000.68, which uses a glob in its pathspec. I tried using a glob for in the archive submodule tests, but I couldn't reproduce the failure. - t5004.11, which is a really big test case that I didn't bother looking deeply into. So I'm at a loss as to what queue_directory() actually does. My best guess at a reproduction would be to make a subdirectory in t5000.68 a submodule. If we do find such a reproducing case, we should add it to the test suite. > static int write_directory( > struct repository *repo, > struct archiver_context *c) > @@ -228,9 +248,11 @@ static int write_directory( > write_directory(repo, c) || > write_archive_entry(repo, &d->oid, d->path, d->baselen, > d->path + d->baselen, d->mode, > - c) != READ_TREE_RECURSIVE; > + c); > free(d); > - return ret ? -1 : 0; > + if (ret == READ_TREE_RECURSIVE) > + return 0; > + return ret; > } > > static int queue_or_write_archive_entry( > @@ -263,6 +285,11 @@ static int queue_or_write_archive_entry( > return 0; > queue_directory(oid, base, filename, mode, c); > return READ_TREE_RECURSIVE; > + } else if (c->args->recurse_submodules && S_ISGITLINK(mode)) { > + if (is_submodule_active(r, filename)) { > + queue_submodule(r, oid, base, filename, mode, c); > + return READ_TREE_RECURSIVE; > + } If we are omitting inactive submodules from the archive, we should test this behavior. > } > > if (write_directory(r, c)) > @@ -446,6 +473,7 @@ static void parse_pathspec_arg( > PATHSPEC_PREFER_FULL, > "", pathspec); > ar_args->pathspec.recursive = 1; > + ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules; > if (pathspec) { > while (*pathspec) { > if (**pathspec && !path_exists(repo, ar_args, *pathspec)) > @@ -609,6 +637,7 @@ static int parse_archive_args(int argc, const char **argv, > int verbose = 0; > int i; > int list = 0; > + int recurse_submodules = 0; > int worktree_attributes = 0; > struct option opts[] = { > OPT_GROUP(""), > @@ -623,6 +652,8 @@ static int parse_archive_args(int argc, const char **argv, > add_file_cb, (intptr_t)&base }, > OPT_STRING('o', "output", &output, N_("file"), > N_("write the archive to this file")), > + OPT_BOOL(0, "recurse-submodules", &recurse_submodules, > + N_("include submodules in archive")), > OPT_BOOL(0, "worktree-attributes", &worktree_attributes, > N_("read .gitattributes in working directory")), > OPT__VERBOSE(&verbose, N_("report archived files on stderr")), > @@ -686,6 +717,7 @@ static int parse_archive_args(int argc, const char **argv, > args->verbose = verbose; > args->base = base; > args->baselen = strlen(base); > + args->recurse_submodules = recurse_submodules; > args->worktree_attributes = worktree_attributes; > > return argc; > diff --git a/archive.h b/archive.h > index 540a3b12130..1b21484dda6 100644 > --- a/archive.h > +++ b/archive.h > @@ -18,6 +18,7 @@ struct archiver_args { > timestamp_t time; > struct pathspec pathspec; > unsigned int verbose : 1; > + unsigned int recurse_submodules : 1; > unsigned int worktree_attributes : 1; > unsigned int convert : 1; > int compression_level; > -- > gitgitgadget
Am 27.10.22 um 01:34 schrieb Glen Choo: > "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: > >> index 34549d849f1..f81ef741487 100644 >> --- a/archive.c >> +++ b/archive.c >> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, >> oidcpy(&d->oid, oid); >> } >> >> +static void queue_submodule( >> + struct repository *superproject, >> + const struct object_id *oid, >> + struct strbuf *base, const char *filename, >> + unsigned mode, struct archiver_context *c) >> +{ >> + struct repository subrepo; >> + >> + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) >> + return; >> + >> + if (repo_read_index(&subrepo) < 0) >> + die("index file corrupt"); >> + >> + queue_directory(oid, base, filename, mode, c); >> + >> + repo_clear(&subrepo); >> +} >> + > What's much more surprising is that you can delete the entire function > body (even queue_directory()!) and the tests still pass! The tests are > definitely testing what they say they are (I've also checked the > tarballs), so I'm not sure what's going on. > > I commented out queue_directory() in the S_ISDIR case, and the only test > failures I saw were: > > - t5000.68, which uses a glob in its pathspec. I tried using a glob for > in the archive submodule tests, but I couldn't reproduce the failure. > - t5004.11, which is a really big test case that I didn't bother looking > deeply into. > > So I'm at a loss as to what queue_directory() actually does. An archive doesn't strictly need directory entries. If it contains a file with a deeply nested path then extractors will create the parent directory hierarchy regardless. diff(1) won't notice any difference. Directory entries are mainly included to specify the permission bits. t5000.68 checks for the directory entries in the output given by the option --verbose of git archive. t5004.11 checks the number of archive entries (including directories) using "zipinfo -h". René
René Scharfe <l.s.r@web.de> writes: > Am 27.10.22 um 01:34 schrieb Glen Choo: >> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: >> >>> index 34549d849f1..f81ef741487 100644 >>> --- a/archive.c >>> +++ b/archive.c >>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, >>> oidcpy(&d->oid, oid); >>> } >>> >>> +static void queue_submodule( >>> + struct repository *superproject, >>> + const struct object_id *oid, >>> + struct strbuf *base, const char *filename, >>> + unsigned mode, struct archiver_context *c) >>> +{ >>> + struct repository subrepo; >>> + >>> + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) >>> + return; >>> + >>> + if (repo_read_index(&subrepo) < 0) >>> + die("index file corrupt"); >>> + >>> + queue_directory(oid, base, filename, mode, c); >>> + >>> + repo_clear(&subrepo); >>> +} >>> + > >> What's much more surprising is that you can delete the entire function >> body (even queue_directory()!) and the tests still pass! The tests are >> definitely testing what they say they are (I've also checked the >> tarballs), so I'm not sure what's going on. >> >> I commented out queue_directory() in the S_ISDIR case, and the only test >> failures I saw were: >> >> - t5000.68, which uses a glob in its pathspec. I tried using a glob for >> in the archive submodule tests, but I couldn't reproduce the failure. >> - t5004.11, which is a really big test case that I didn't bother looking >> deeply into. >> >> So I'm at a loss as to what queue_directory() actually does. > An archive doesn't strictly need directory entries. If it contains a > file with a deeply nested path then extractors will create the parent > directory hierarchy regardless. diff(1) won't notice any difference. > Directory entries are mainly included to specify the permission bits. Thanks. In that case, we should probably also test the case where there are empty directories (e.g. when a file is excluded by a pathspec), and we should also check the permission bits. > > t5000.68 checks for the directory entries in the output given by the > option --verbose of git archive. t5004.11 checks the number of archive > entries (including directories) using "zipinfo -h". > > René
René Scharfe <l.s.r@web.de> writes: > Am 27.10.22 um 01:34 schrieb Glen Choo: >> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: >> >>> index 34549d849f1..f81ef741487 100644 >>> --- a/archive.c >>> +++ b/archive.c >>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, >>> oidcpy(&d->oid, oid); >>> } >>> >>> +static void queue_submodule( >>> + struct repository *superproject, >>> + const struct object_id *oid, >>> + struct strbuf *base, const char *filename, >>> + unsigned mode, struct archiver_context *c) >>> +{ >>> + struct repository subrepo; >>> + >>> + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) >>> + return; >>> + >>> + if (repo_read_index(&subrepo) < 0) >>> + die("index file corrupt"); >>> + >>> + queue_directory(oid, base, filename, mode, c); >>> + >>> + repo_clear(&subrepo); >>> +} >>> + > >> What's much more surprising is that you can delete the entire function >> body (even queue_directory()!) and the tests still pass! The tests are >> definitely testing what they say they are (I've also checked the >> tarballs), so I'm not sure what's going on. >> >> I commented out queue_directory() in the S_ISDIR case, and the only test >> failures I saw were: >> >> - t5000.68, which uses a glob in its pathspec. I tried using a glob for >> in the archive submodule tests, but I couldn't reproduce the failure. >> - t5004.11, which is a really big test case that I didn't bother looking >> deeply into. >> >> So I'm at a loss as to what queue_directory() actually does. > An archive doesn't strictly need directory entries. If it contains a > file with a deeply nested path then extractors will create the parent > directory hierarchy regardless. diff(1) won't notice any difference. > Directory entries are mainly included to specify the permission bits. Thanks. In that case, we should probably also test the case where there are empty directories (e.g. when a file is excluded by a pathspec), and we should also check the permission bits. > > t5000.68 checks for the directory entries in the output given by the > option --verbose of git archive. t5004.11 checks the number of archive > entries (including directories) using "zipinfo -h". > > René
René Scharfe <l.s.r@web.de> writes: > Am 27.10.22 um 01:34 schrieb Glen Choo: >> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes: >> >>> index 34549d849f1..f81ef741487 100644 >>> --- a/archive.c >>> +++ b/archive.c >>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, >>> oidcpy(&d->oid, oid); >>> } >>> >>> +static void queue_submodule( >>> + struct repository *superproject, >>> + const struct object_id *oid, >>> + struct strbuf *base, const char *filename, >>> + unsigned mode, struct archiver_context *c) >>> +{ >>> + struct repository subrepo; >>> + >>> + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) >>> + return; >>> + >>> + if (repo_read_index(&subrepo) < 0) >>> + die("index file corrupt"); >>> + >>> + queue_directory(oid, base, filename, mode, c); >>> + >>> + repo_clear(&subrepo); >>> +} >>> + > >> What's much more surprising is that you can delete the entire function >> body (even queue_directory()!) and the tests still pass! The tests are >> definitely testing what they say they are (I've also checked the >> tarballs), so I'm not sure what's going on. >> >> I commented out queue_directory() in the S_ISDIR case, and the only test >> failures I saw were: >> >> - t5000.68, which uses a glob in its pathspec. I tried using a glob for >> in the archive submodule tests, but I couldn't reproduce the failure. >> - t5004.11, which is a really big test case that I didn't bother looking >> deeply into. >> >> So I'm at a loss as to what queue_directory() actually does. > An archive doesn't strictly need directory entries. If it contains a > file with a deeply nested path then extractors will create the parent > directory hierarchy regardless. diff(1) won't notice any difference. > Directory entries are mainly included to specify the permission bits. Thanks. In that case, we should probably also test the case where there are empty directories (e.g. when a file is excluded by a pathspec), and we should also check the permission bits. > > t5000.68 checks for the directory entries in the output given by the > option --verbose of git archive. t5004.11 checks the number of archive > entries (including directories) using "zipinfo -h". > > René
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index 60c040988bb..22f54428b98 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -10,7 +10,8 @@ SYNOPSIS -------- [verse] 'git archive' [--format=<fmt>] [--list] [--prefix=<prefix>/] [<extra>] - [-o <file> | --output=<file>] [--worktree-attributes] + [-o <file> | --output=<file>] + [--recurse-submodules] [--worktree-attributes] [--remote=<repo> [--exec=<git-upload-archive>]] <tree-ish> [<path>...] @@ -82,6 +83,9 @@ The file mode is limited to a regular file, and the option may be subject to platform-dependent command-line limits. For non-trivial cases, write an untracked file and use `--add-file` instead. +--recurse-submodules + Include submodules recursively in archive. + --worktree-attributes:: Look for attributes in .gitattributes files in the working tree as well (see <<ATTRIBUTES>>). diff --git a/archive.c b/archive.c index 34549d849f1..f81ef741487 100644 --- a/archive.c +++ b/archive.c @@ -10,6 +10,7 @@ #include "unpack-trees.h" #include "dir.h" #include "quote.h" +#include "submodule.h" static char const * const archive_usage[] = { N_("git archive [<options>] <tree-ish> [<path>...]"), @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid, oidcpy(&d->oid, oid); } +static void queue_submodule( + struct repository *superproject, + const struct object_id *oid, + struct strbuf *base, const char *filename, + unsigned mode, struct archiver_context *c) +{ + struct repository subrepo; + + if (repo_submodule_init(&subrepo, superproject, filename, null_oid())) + return; + + if (repo_read_index(&subrepo) < 0) + die("index file corrupt"); + + queue_directory(oid, base, filename, mode, c); + + repo_clear(&subrepo); +} + static int write_directory( struct repository *repo, struct archiver_context *c) @@ -228,9 +248,11 @@ static int write_directory( write_directory(repo, c) || write_archive_entry(repo, &d->oid, d->path, d->baselen, d->path + d->baselen, d->mode, - c) != READ_TREE_RECURSIVE; + c); free(d); - return ret ? -1 : 0; + if (ret == READ_TREE_RECURSIVE) + return 0; + return ret; } static int queue_or_write_archive_entry( @@ -263,6 +285,11 @@ static int queue_or_write_archive_entry( return 0; queue_directory(oid, base, filename, mode, c); return READ_TREE_RECURSIVE; + } else if (c->args->recurse_submodules && S_ISGITLINK(mode)) { + if (is_submodule_active(r, filename)) { + queue_submodule(r, oid, base, filename, mode, c); + return READ_TREE_RECURSIVE; + } } if (write_directory(r, c)) @@ -446,6 +473,7 @@ static void parse_pathspec_arg( PATHSPEC_PREFER_FULL, "", pathspec); ar_args->pathspec.recursive = 1; + ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules; if (pathspec) { while (*pathspec) { if (**pathspec && !path_exists(repo, ar_args, *pathspec)) @@ -609,6 +637,7 @@ static int parse_archive_args(int argc, const char **argv, int verbose = 0; int i; int list = 0; + int recurse_submodules = 0; int worktree_attributes = 0; struct option opts[] = { OPT_GROUP(""), @@ -623,6 +652,8 @@ static int parse_archive_args(int argc, const char **argv, add_file_cb, (intptr_t)&base }, OPT_STRING('o', "output", &output, N_("file"), N_("write the archive to this file")), + OPT_BOOL(0, "recurse-submodules", &recurse_submodules, + N_("include submodules in archive")), OPT_BOOL(0, "worktree-attributes", &worktree_attributes, N_("read .gitattributes in working directory")), OPT__VERBOSE(&verbose, N_("report archived files on stderr")), @@ -686,6 +717,7 @@ static int parse_archive_args(int argc, const char **argv, args->verbose = verbose; args->base = base; args->baselen = strlen(base); + args->recurse_submodules = recurse_submodules; args->worktree_attributes = worktree_attributes; return argc; diff --git a/archive.h b/archive.h index 540a3b12130..1b21484dda6 100644 --- a/archive.h +++ b/archive.h @@ -18,6 +18,7 @@ struct archiver_args { timestamp_t time; struct pathspec pathspec; unsigned int verbose : 1; + unsigned int recurse_submodules : 1; unsigned int worktree_attributes : 1; unsigned int convert : 1; int compression_level;