diff mbox series

[v2,1/6] archive: optionally add "virtual" files

Message ID 49ff3c1f2b32b16df2b4216aa016d715b6de46bc.1644187146.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series scalar: implement the subcommand "diagnose" | expand

Commit Message

Johannes Schindelin Feb. 6, 2022, 10:39 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

With the `--add-file-with-content=<path>:<content>` option, `git
archive` now supports use cases where relatively trivial files need to
be added that do not exist on disk.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Documentation/git-archive.txt | 11 ++++++++
 archive.c                     | 51 +++++++++++++++++++++++++++++------
 t/t5003-archive-zip.sh        | 12 +++++++++
 3 files changed, 66 insertions(+), 8 deletions(-)

Comments

René Scharfe Feb. 7, 2022, 7:55 p.m. UTC | #1
Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> With the `--add-file-with-content=<path>:<content>` option, `git
> archive` now supports use cases where relatively trivial files need to
> be added that do not exist on disk.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>  Documentation/git-archive.txt | 11 ++++++++
>  archive.c                     | 51 +++++++++++++++++++++++++++++------
>  t/t5003-archive-zip.sh        | 12 +++++++++
>  3 files changed, 66 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> index bc4e76a7834..1b52a0a65a1 100644
> --- a/Documentation/git-archive.txt
> +++ b/Documentation/git-archive.txt
> @@ -61,6 +61,17 @@ OPTIONS
>  	by concatenating the value for `--prefix` (if any) and the
>  	basename of <file>.
>
> +--add-file-with-content=<path>:<content>::
> +	Add the specified contents to the archive.  Can be repeated to add
> +	multiple files.  The path of the file in the archive is built
> +	by concatenating the value for `--prefix` (if any) and the
> +	basename of <file>.
> ++
> +The `<path>` cannot contain any colon, the file mode is limited to
> +a regular file, and the option may be subject platform-dependent

s/subject/& to/

> +command-line limits. For non-trivial cases, write an untracked file
> +and use `--add-file` instead.
> +

We could use that option in Git's own Makefile to add the file named
"version", which contains $GIT_VERSION.  Hmm, but it also contains a
terminating newline, which would be a bit tricky (but not impossible) to
add.  Would it make sense to add one automatically if it's missing (e.g.
with strbuf_complete_line)?  Not sure.

>  --worktree-attributes::
>  	Look for attributes in .gitattributes files in the working tree
>  	as well (see <<ATTRIBUTES>>).
> diff --git a/archive.c b/archive.c
> index a3bbb091256..172efd690c3 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
>  struct extra_file_info {
>  	char *base;
>  	struct stat stat;
> +	void *content;
>  };
>
>  int write_archive_entries(struct archiver_args *args,
> @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args,
>  		strbuf_addstr(&path_in_archive, basename(path));
>
>  		strbuf_reset(&content);
> -		if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
> +		if (info->content)
> +			err = write_entry(args, &fake_oid, path_in_archive.buf,
> +					  path_in_archive.len,
> +					  info->stat.st_mode,
> +					  info->content, info->stat.st_size);
> +		else if (strbuf_read_file(&content, path,
> +					  info->stat.st_size) < 0)
>  			err = error_errno(_("could not read '%s'"), path);
>  		else
>  			err = write_entry(args, &fake_oid, path_in_archive.buf,
> @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str)
>  {
>  	struct extra_file_info *info = util;
>  	free(info->base);
> +	free(info->content);
>  	free(info);
>  }
>
> @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
>  	if (!arg)
>  		return -1;
>
> -	path = prefix_filename(args->prefix, arg);
> -	item = string_list_append_nodup(&args->extra_files, path);
> -	item->util = info = xmalloc(sizeof(*info));
> +	info = xmalloc(sizeof(*info));
>  	info->base = xstrdup_or_null(base);
> -	if (stat(path, &info->stat))
> -		die(_("File not found: %s"), path);
> -	if (!S_ISREG(info->stat.st_mode))
> -		die(_("Not a regular file: %s"), path);
> +
> +	if (strcmp(opt->long_name, "add-file-with-content")) {

Equivalent to:

	if (!strcmp(opt->long_name, "add-file")) {

I mention that because the inequality check confused me a bit at first.

> +		path = prefix_filename(args->prefix, arg);
> +		if (stat(path, &info->stat))
> +			die(_("File not found: %s"), path);
> +		if (!S_ISREG(info->stat.st_mode))
> +			die(_("Not a regular file: %s"), path);
> +		info->content = NULL; /* read the file later */
> +	} else {
> +		const char *colon = strchr(arg, ':');
> +		char *p;
> +
> +		if (!colon)
> +			die(_("missing colon: '%s'"), arg);
> +
> +		p = xstrndup(arg, colon - arg);
> +		if (!args->prefix)
> +			path = p;
> +		else {
> +			path = prefix_filename(args->prefix, p);
> +			free(p);
> +		}
> +		memset(&info->stat, 0, sizeof(info->stat));
> +		info->stat.st_mode = S_IFREG | 0644;
> +		info->content = xstrdup(colon + 1);
> +		info->stat.st_size = strlen(info->content);
> +	}
> +	item = string_list_append_nodup(&args->extra_files, path);
> +	item->util = info;
> +
>  	return 0;
>  }
>
> @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv,
>  		{ OPTION_CALLBACK, 0, "add-file", args, N_("file"),
>  		  N_("add untracked file to archive"), 0, add_file_cb,
>  		  (intptr_t)&base },
> +		{ OPTION_CALLBACK, 0, "add-file-with-content", args,
> +		  N_("file"), N_("add untracked file to archive"), 0,
                      ^^^^
"<file>" seems wrong, because there is no actual file.  It should rather
be "<name>:<content>" for the virtual one, right?

> +		  add_file_cb, (intptr_t)&base },
>  		OPT_STRING('o', "output", &output, N_("file"),
>  			N_("write the archive to this file")),
>  		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
> diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
> index 1e6d18b140e..8ff1257f1a0 100755
> --- a/t/t5003-archive-zip.sh
> +++ b/t/t5003-archive-zip.sh
> @@ -206,6 +206,18 @@ test_expect_success 'git archive --format=zip --add-file' '
>  check_zip with_untracked
>  check_added with_untracked untracked untracked
>
> +test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' '
> +	git archive --format=zip >with_file_with_content.zip \
> +		--add-file-with-content=hello:world $EMPTY_TREE &&
> +	test_when_finished "rm -rf tmp-unpack" &&
> +	mkdir tmp-unpack && (
> +		cd tmp-unpack &&
> +		"$GIT_UNZIP" ../with_file_with_content.zip &&
> +		test_path_is_file hello &&
> +		test world = $(cat hello)
> +	)
> +'
> +
>  test_expect_success 'git archive --format=zip --add-file twice' '
>  	echo untracked >untracked &&
>  	git archive --format=zip --prefix=one/ --add-file=untracked \
Junio C Hamano Feb. 7, 2022, 11:30 p.m. UTC | #2
René Scharfe <l.s.r@web.de> writes:

> We could use that option in Git's own Makefile to add the file named
> "version", which contains $GIT_VERSION.  Hmm, but it also contains a
> terminating newline, which would be a bit tricky (but not impossible) to
> add.  Would it make sense to add one automatically if it's missing (e.g.
> with strbuf_complete_line)?  Not sure.

I do not think it is a good UI to give raw file content from the
command line, which will be usable only for trivial, even single
liner files, and forces people to learn two parallel option, one
for trivial ones and the other for contents with meaningful size.

"--add-blob=<path>:<blob-object-name>" may be another option, useful
when you have done "hash-object -w" already, and can be used to add
single-liner, or an entire novel.

In any case, "--add-file=<file>", which we already have, would be
more appropriate feature to use to record our "version" file, so
there is no need to change our Makefile for it.
Johannes Schindelin Feb. 8, 2022, 12:54 p.m. UTC | #3
Hi René,

On Mon, 7 Feb 2022, René Scharfe wrote:

> Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget:
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > With the `--add-file-with-content=<path>:<content>` option, `git
> > archive` now supports use cases where relatively trivial files need to
> > be added that do not exist on disk.
> >
> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> > ---
> >  Documentation/git-archive.txt | 11 ++++++++
> >  archive.c                     | 51 +++++++++++++++++++++++++++++------
> >  t/t5003-archive-zip.sh        | 12 +++++++++
> >  3 files changed, 66 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> > index bc4e76a7834..1b52a0a65a1 100644
> > --- a/Documentation/git-archive.txt
> > +++ b/Documentation/git-archive.txt
> > @@ -61,6 +61,17 @@ OPTIONS
> >  	by concatenating the value for `--prefix` (if any) and the
> >  	basename of <file>.
> >
> > +--add-file-with-content=<path>:<content>::
> > +	Add the specified contents to the archive.  Can be repeated to add
> > +	multiple files.  The path of the file in the archive is built
> > +	by concatenating the value for `--prefix` (if any) and the
> > +	basename of <file>.
> > ++
> > +The `<path>` cannot contain any colon, the file mode is limited to
> > +a regular file, and the option may be subject platform-dependent
>
> s/subject/& to/

Thanks.

> > +command-line limits. For non-trivial cases, write an untracked file
> > +and use `--add-file` instead.
> > +
>
> We could use that option in Git's own Makefile to add the file named
> "version", which contains $GIT_VERSION.

We could do that, that opportunity is a side effect of this patch series.

> Hmm, but it also contains a terminating newline, which would be a bit
> tricky (but not impossible) to add.  Would it make sense to add one
> automatically if it's missing (e.g. with strbuf_complete_line)?  Not
> sure.

It is really easy:

	LF='
	'

	git archive --add-file-with-content=version:"$GIT_VERSION$LF" ...

(That's shell script, in the Makefile it would need those `\`
continuations.)

> >  --worktree-attributes::
> >  	Look for attributes in .gitattributes files in the working tree
> >  	as well (see <<ATTRIBUTES>>).
> > diff --git a/archive.c b/archive.c
> > index a3bbb091256..172efd690c3 100644
> > --- a/archive.c
> > +++ b/archive.c
> > @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
> >  struct extra_file_info {
> >  	char *base;
> >  	struct stat stat;
> > +	void *content;
> >  };
> >
> >  int write_archive_entries(struct archiver_args *args,
> > @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args,
> >  		strbuf_addstr(&path_in_archive, basename(path));
> >
> >  		strbuf_reset(&content);
> > -		if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
> > +		if (info->content)
> > +			err = write_entry(args, &fake_oid, path_in_archive.buf,
> > +					  path_in_archive.len,
> > +					  info->stat.st_mode,
> > +					  info->content, info->stat.st_size);
> > +		else if (strbuf_read_file(&content, path,
> > +					  info->stat.st_size) < 0)
> >  			err = error_errno(_("could not read '%s'"), path);
> >  		else
> >  			err = write_entry(args, &fake_oid, path_in_archive.buf,
> > @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str)
> >  {
> >  	struct extra_file_info *info = util;
> >  	free(info->base);
> > +	free(info->content);
> >  	free(info);
> >  }
> >
> > @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
> >  	if (!arg)
> >  		return -1;
> >
> > -	path = prefix_filename(args->prefix, arg);
> > -	item = string_list_append_nodup(&args->extra_files, path);
> > -	item->util = info = xmalloc(sizeof(*info));
> > +	info = xmalloc(sizeof(*info));
> >  	info->base = xstrdup_or_null(base);
> > -	if (stat(path, &info->stat))
> > -		die(_("File not found: %s"), path);
> > -	if (!S_ISREG(info->stat.st_mode))
> > -		die(_("Not a regular file: %s"), path);
> > +
> > +	if (strcmp(opt->long_name, "add-file-with-content")) {
>
> Equivalent to:
>
> 	if (!strcmp(opt->long_name, "add-file")) {
>
> I mention that because the inequality check confused me a bit at first.

Good point. For some reason I thought it would be clearer to handle
everything but `--add-file-with-content` here, but that "everything but"
is only `--add-file`, so I sowed more confusion. Sorry about that.

>
> > +		path = prefix_filename(args->prefix, arg);
> > +		if (stat(path, &info->stat))
> > +			die(_("File not found: %s"), path);
> > +		if (!S_ISREG(info->stat.st_mode))
> > +			die(_("Not a regular file: %s"), path);
> > +		info->content = NULL; /* read the file later */
> > +	} else {
> > +		const char *colon = strchr(arg, ':');
> > +		char *p;
> > +
> > +		if (!colon)
> > +			die(_("missing colon: '%s'"), arg);
> > +
> > +		p = xstrndup(arg, colon - arg);
> > +		if (!args->prefix)
> > +			path = p;
> > +		else {
> > +			path = prefix_filename(args->prefix, p);
> > +			free(p);
> > +		}
> > +		memset(&info->stat, 0, sizeof(info->stat));
> > +		info->stat.st_mode = S_IFREG | 0644;
> > +		info->content = xstrdup(colon + 1);
> > +		info->stat.st_size = strlen(info->content);
> > +	}
> > +	item = string_list_append_nodup(&args->extra_files, path);
> > +	item->util = info;
> > +
> >  	return 0;
> >  }
> >
> > @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv,
> >  		{ OPTION_CALLBACK, 0, "add-file", args, N_("file"),
> >  		  N_("add untracked file to archive"), 0, add_file_cb,
> >  		  (intptr_t)&base },
> > +		{ OPTION_CALLBACK, 0, "add-file-with-content", args,
> > +		  N_("file"), N_("add untracked file to archive"), 0,
>                       ^^^^
> "<file>" seems wrong, because there is no actual file.  It should rather
> be "<name>:<content>" for the virtual one, right?

Or `<path>:<content>`. Yes.

Again, thank you for your clear and helpful review,
Dscho
Johannes Schindelin Feb. 8, 2022, 1:12 p.m. UTC | #4
Hi Junio,

On Mon, 7 Feb 2022, Junio C Hamano wrote:

> René Scharfe <l.s.r@web.de> writes:
>
> > We could use that option in Git's own Makefile to add the file named
> > "version", which contains $GIT_VERSION.  Hmm, but it also contains a
> > terminating newline, which would be a bit tricky (but not impossible) to
> > add.  Would it make sense to add one automatically if it's missing (e.g.
> > with strbuf_complete_line)?  Not sure.
>
> I do not think it is a good UI to give raw file content from the
> command line, which will be usable only for trivial, even single
> liner files, and forces people to learn two parallel option, one
> for trivial ones and the other for contents with meaningful size.

Nevertheless, it is still the most elegant way that I can think of to
generate a diagnostic `.zip` file without messing up the very things that
are to be diagnosed: the repository and the worktree.

> "--add-blob=<path>:<blob-object-name>" may be another option, useful
> when you have done "hash-object -w" already, and can be used to add
> single-liner, or an entire novel.

This would mess with the repository. Granted, it is unlikely that adding a
tiny blob will all of a sudden work around a bug that the user wanted to
report, but less big mutations have been known to subtly change a bug's
manifested symptoms.

So I really do not want to do that, not in `scalar diagnose.

> In any case, "--add-file=<file>", which we already have, would be
> more appropriate feature to use to record our "version" file, so
> there is no need to change our Makefile for it.

Same here. It is bad enough that `scalar diagnose` has to create a
directory in the current enlistment. Let's not make the situation even
worse.

The most elegant solution would have been that streaming `--add-file` mode
suggested by René, I think, but that's too involved to implement just to
benefit `scalar diagnose`. It's not like we can simply stream the contents
via `stdin`, as there are more than one "virtual" file we need to add to
that `.zip` file.

Ciao,
Dscho
Junio C Hamano Feb. 8, 2022, 5:44 p.m. UTC | #5
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> > We could use that option in Git's own Makefile to add the file named
>> > "version", which contains $GIT_VERSION.  Hmm, but it also contains a
>> > terminating newline, which would be a bit tricky (but not impossible) to
>> > add.  Would it make sense to add one automatically if it's missing (e.g.
>> > with strbuf_complete_line)?  Not sure.
>>
>> I do not think it is a good UI to give raw file content from the
>> command line, which will be usable only for trivial, even single
>> liner files, and forces people to learn two parallel option, one
>> for trivial ones and the other for contents with meaningful size.
>
> Nevertheless, it is still the most elegant way that I can think of to
> generate a diagnostic `.zip` file without messing up the very things that
> are to be diagnosed: the repository and the worktree.

Puzzled.  Are you feeding contents of a .zip file from the command
line?

I was mostly worried about busting command line argument limit by
trying to feed too many bytes, as the ceiling is fairly low on some
platforms.  Another worry was that when <contents> can have
arbitrary bytes, with --opt=<path>:<contents> syntax, the input
becomes ambiguous (i.e. "which colon is the <path> separator?"),
without some way to escape a colon in the payload.

For a single-liner, --add-file-with-contents=<path>:<contents> would
be an OK way, and my comment was not a strong objection against this
new option existing.  It was primarily an objection against changing
the way to add the 'version' file in our "make dist" procedure to
use it anyway.

But now I think about it more, I am becoming less happy about it
existing in the first place.

This will throw another monkey wrench to Konstantin's plan [*] to
make "git archive" output verifiable with the signature on original
Git objects, but it is not a new problem ;-)


[Reference]

* https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/
René Scharfe Feb. 8, 2022, 8:58 p.m. UTC | #6
Am 08.02.22 um 18:44 schrieb Junio C Hamano:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>
>>>> We could use that option in Git's own Makefile to add the file named
>>>> "version", which contains $GIT_VERSION.  Hmm, but it also contains a
>>>> terminating newline, which would be a bit tricky (but not impossible) to
>>>> add.  Would it make sense to add one automatically if it's missing (e.g.
>>>> with strbuf_complete_line)?  Not sure.
>>>
>>> I do not think it is a good UI to give raw file content from the
>>> command line, which will be usable only for trivial, even single
>>> liner files, and forces people to learn two parallel option, one
>>> for trivial ones and the other for contents with meaningful size.
>>
>> Nevertheless, it is still the most elegant way that I can think of to
>> generate a diagnostic `.zip` file without messing up the very things that
>> are to be diagnosed: the repository and the worktree.
>
> Puzzled.  Are you feeding contents of a .zip file from the command
> line?

Kind of.  Command line arguments are built and handed to write_archive()
in-process.  It's done by patch 3 and extended by 5 and 6.

The number of files is relatively low and they aren't huge, right?
Staging their content in the object database would be messy, but $TMPDIR
might be able to take them with a low impact.  Unless the problem to
diagnose is that this directory is full -- but you don't need a fancy
report for that. :)

Currently there is no easy way to write a temporary file with a chosen
name.  diff.c would benefit from such a thing when running an external
diff program; currently it adds a random prefix.  git archive --add-file
also uses the filename (and discards the directory part).  The patch
below adds a function to create temporary files with a chosen name.
Perhaps it would be useful here as well, instead of the new option?

> I was mostly worried about busting command line argument limit by
> trying to feed too many bytes, as the ceiling is fairly low on some
> platforms.

Command line length limits don't apply to the way scalar uses the new
option.

> Another worry was that when <contents> can have
> arbitrary bytes, with --opt=<path>:<contents> syntax, the input
> becomes ambiguous (i.e. "which colon is the <path> separator?"),
> without some way to escape a colon in the payload.

The first colon is the separator here.

> For a single-liner, --add-file-with-contents=<path>:<contents> would
> be an OK way, and my comment was not a strong objection against this
> new option existing.  It was primarily an objection against changing
> the way to add the 'version' file in our "make dist" procedure to
> use it anyway.
>
> But now I think about it more, I am becoming less happy about it
> existing in the first place.
>
> This will throw another monkey wrench to Konstantin's plan [*] to
> make "git archive" output verifiable with the signature on original
> Git objects, but it is not a new problem ;-)
>
>
> [Reference]
>
> * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/

I don't see the conflict: If an untracked file is added to an archive
using --add-file, --add-file-with-content, or ZIP or tar then we'd
*want* the verification against a signed commit or tag to fail, no?  A
different signature would be required for the non-tracked parts.

René


--- >8 ---
Subject: [PATCH] tempfile: add mks_tempfile_dt()

Add a function to create a temporary file with a certain name in a
temporary directory created using mkdtemp(3).  Its result is more
sightly than the paths created by mks_tempfile_ts(), which include
a random prefix.  That's useful for files passed to a program that
displays their name, e.g. an external diff tool.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 tempfile.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tempfile.h | 13 +++++++++++
 2 files changed, 76 insertions(+)

diff --git a/tempfile.c b/tempfile.c
index 94aa18f3f7..2024c82691 100644
--- a/tempfile.c
+++ b/tempfile.c
@@ -56,6 +56,20 @@

 static VOLATILE_LIST_HEAD(tempfile_list);

+static void remove_template_directory(struct tempfile *tempfile,
+				      int in_signal_handler)
+{
+	if (tempfile->directorylen > 0 &&
+	    tempfile->directorylen < tempfile->filename.len &&
+	    tempfile->filename.buf[tempfile->directorylen] == '/') {
+		strbuf_setlen(&tempfile->filename, tempfile->directorylen);
+		if (in_signal_handler)
+			rmdir(tempfile->filename.buf);
+		else
+			rmdir_or_warn(tempfile->filename.buf);
+	}
+}
+
 static void remove_tempfiles(int in_signal_handler)
 {
 	pid_t me = getpid();
@@ -74,6 +88,7 @@ static void remove_tempfiles(int in_signal_handler)
 			unlink(p->filename.buf);
 		else
 			unlink_or_warn(p->filename.buf);
+		remove_template_directory(p, in_signal_handler);

 		p->active = 0;
 	}
@@ -100,6 +115,7 @@ static struct tempfile *new_tempfile(void)
 	tempfile->owner = 0;
 	INIT_LIST_HEAD(&tempfile->list);
 	strbuf_init(&tempfile->filename, 0);
+	tempfile->directorylen = 0;
 	return tempfile;
 }

@@ -198,6 +214,52 @@ struct tempfile *mks_tempfile_tsm(const char *filename_template, int suffixlen,
 	return tempfile;
 }

+struct tempfile *mks_tempfile_dt(const char *directory_template,
+				 const char *filename)
+{
+	struct tempfile *tempfile;
+	const char *tmpdir;
+	struct strbuf sb = STRBUF_INIT;
+	int fd;
+	size_t directorylen;
+
+	if (!ends_with(directory_template, "XXXXXX")) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	tmpdir = getenv("TMPDIR");
+	if (!tmpdir)
+		tmpdir = "/tmp";
+
+	strbuf_addf(&sb, "%s/%s", tmpdir, directory_template);
+	directorylen = sb.len;
+	if (!mkdtemp(sb.buf)) {
+		int orig_errno = errno;
+		strbuf_release(&sb);
+		errno = orig_errno;
+		return NULL;
+	}
+
+	strbuf_addf(&sb, "/%s", filename);
+	fd = open(sb.buf, O_CREAT | O_EXCL | O_RDWR, 0600);
+	if (fd < 0) {
+		int orig_errno = errno;
+		strbuf_setlen(&sb, directorylen);
+		rmdir(sb.buf);
+		strbuf_release(&sb);
+		errno = orig_errno;
+		return NULL;
+	}
+
+	tempfile = new_tempfile();
+	strbuf_swap(&tempfile->filename, &sb);
+	tempfile->directorylen = directorylen;
+	tempfile->fd = fd;
+	activate_tempfile(tempfile);
+	return tempfile;
+}
+
 struct tempfile *xmks_tempfile_m(const char *filename_template, int mode)
 {
 	struct tempfile *tempfile;
@@ -316,6 +378,7 @@ void delete_tempfile(struct tempfile **tempfile_p)

 	close_tempfile_gently(tempfile);
 	unlink_or_warn(tempfile->filename.buf);
+	remove_template_directory(tempfile, 0);
 	deactivate_tempfile(tempfile);
 	*tempfile_p = NULL;
 }
diff --git a/tempfile.h b/tempfile.h
index 4de3bc77d2..d7804a214a 100644
--- a/tempfile.h
+++ b/tempfile.h
@@ -82,6 +82,7 @@ struct tempfile {
 	FILE *volatile fp;
 	volatile pid_t owner;
 	struct strbuf filename;
+	size_t directorylen;
 };

 /*
@@ -198,6 +199,18 @@ static inline struct tempfile *xmks_tempfile(const char *filename_template)
 	return xmks_tempfile_m(filename_template, 0600);
 }

+/*
+ * Attempt to create a temporary directory in $TMPDIR and to create and
+ * open a file in that new directory. Derive the directory name from the
+ * template in the manner of mkdtemp(). Arrange for directory and file
+ * to be deleted if the program exits before they are deleted
+ * explicitly. On success return a tempfile whose "filename" member
+ * contains the full path of the file and its "fd" member is open for
+ * writing the file. On error return NULL and set errno appropriately.
+ */
+struct tempfile *mks_tempfile_dt(const char *directory_template,
+				 const char *filename);
+
 /*
  * Associate a stdio stream with the temporary file (which must still
  * be open). Return `NULL` (*without* deleting the file) on error. The
--
2.35.1
Junio C Hamano Feb. 9, 2022, 10:48 p.m. UTC | #7
René Scharfe <l.s.r@web.de> writes:

>>> Nevertheless, it is still the most elegant way that I can think of to
>>> generate a diagnostic `.zip` file without messing up the very things that
>>> are to be diagnosed: the repository and the worktree.
>>
>> Puzzled.  Are you feeding contents of a .zip file from the command
>> line?
>
> Kind of.  Command line arguments are built and handed to write_archive()
> in-process.  It's done by patch 3 and extended by 5 and 6.

I meant to ask if this is doing

    git archive --store-contents-at-path="report.zip:$(cat diag.zip)"

as I misunderstood what 'the diagnostic .zip file' referred to.
That was a reference to the output of the "git archive" command.

> The number of files is relatively low and they aren't huge, right?

As long as it is expected to fit on the command line, that's fine.
But if the question is "it is OK to add a new option with known
limitation", then it should be stated a bit differently.

"We add this option for use cases where we handle only small number
of one-liner files", and it is OK.  We may however want to do
something imilar to what we do to the "-m '<message>'" option used
by "git commit" and "git merge", i.e. add the final LF when it is
missing to make it a complete line, to hint the fact that this is
meant to add a small number of single liner files.

>> Another worry was that when <contents> can have
>> arbitrary bytes, with --opt=<path>:<contents> syntax, the input
>> becomes ambiguous (i.e. "which colon is the <path> separator?"),
>> without some way to escape a colon in the payload.
>
> The first colon is the separator here.

Meaning you cannot have a colon in the path, which is not exactly
pleasing limitation.  I know you may not be able to do so on Windows
or CIFS mounted on non-Windows, but we do not limit ourselves to
portable filename character set (POSIX.1 3.282), either.

>> This will throw another monkey wrench to Konstantin's plan [*] to
>> make "git archive" output verifiable with the signature on original
>> Git objects, but it is not a new problem ;-)
>>
>>
>> [Reference]
>>
>> * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/
>
> I don't see the conflict: If an untracked file is added to an archive
> using --add-file, --add-file-with-content, or ZIP or tar then we'd
> *want* the verification against a signed commit or tag to fail, no?  A
> different signature would be required for the non-tracked parts.

Yes, which is exactly how this (and existing --add-file) makes
Konstantin's plan much less useful.

Thanks.
René Scharfe Feb. 10, 2022, 7:10 p.m. UTC | #8
Am 09.02.22 um 23:48 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>> The number of files is relatively low and they aren't huge, right?
>
> As long as it is expected to fit on the command line, that's fine.
> But if the question is "it is OK to add a new option with known
> limitation", then it should be stated a bit differently.

I asked this question to find out if writing the files to $TMPDIR and
adding them with --add-file instead of with --add-file-with-content
would be feasible in patches 3 to 6.  git archive would not have to be
changed in that case.

>>> This will throw another monkey wrench to Konstantin's plan [*] to
>>> make "git archive" output verifiable with the signature on original
>>> Git objects, but it is not a new problem ;-)
>>>
>>>
>>> [Reference]
>>>
>>> * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/
>>
>> I don't see the conflict: If an untracked file is added to an archive
>> using --add-file, --add-file-with-content, or ZIP or tar then we'd
>> *want* the verification against a signed commit or tag to fail, no?  A
>> different signature would be required for the non-tracked parts.
>
> Yes, which is exactly how this (and existing --add-file) makes
> Konstantin's plan much less useful.
People added untracked files to archives before --add-file existed.

--add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file.

Additional untracked files would need a manifest to specify which files
are (not) covered by the signed commit/tag.  Or the .GIT_ARCHIVE_SIG
files could be added just after the signed files as a rule, before any
other untracked files, as some kind of a separator.

Just listing untracked files and verifying the others might still be
useful.  Warning about untracked files shadowing tracked ones would be
very useful.

Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of
the untracked files could optionally be added at the end to allow full
verification -- but would require signing at archive creation time.

René
Junio C Hamano Feb. 10, 2022, 7:23 p.m. UTC | #9
René Scharfe <l.s.r@web.de> writes:

>> Yes, which is exactly how this (and existing --add-file) makes
>> Konstantin's plan much less useful.
> People added untracked files to archives before --add-file existed.
>
> --add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file.
>
> Additional untracked files would need a manifest to specify which files
> are (not) covered by the signed commit/tag.  Or the .GIT_ARCHIVE_SIG
> files could be added just after the signed files as a rule, before any
> other untracked files, as some kind of a separator.

Or if people do not _exclude_ tracked files from the archive, then
the verifier who has a tarball and a Git tree object can consult the
tree object to see which ones are added untracked cruft.

> Just listing untracked files and verifying the others might still be
> useful.  Warning about untracked files shadowing tracked ones would be
> very useful.

Yup.

> Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of
> the untracked files could optionally be added at the end to allow full
> verification -- but would require signing at archive creation time.

Yeah, and at that point, it is not much more convenient than just
signing the whole archive (sans the SIG part, obviously), which is
what people have always done ;-)
René Scharfe Feb. 11, 2022, 7:16 p.m. UTC | #10
Am 10.02.22 um 20:23 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>>> Yes, which is exactly how this (and existing --add-file) makes
>>> Konstantin's plan much less useful.

A harder obstacle to verification would be end-of-line conversion.
Retrying a failed signature check after applying convert_to_git() might
work, but not for files that have mixed line endings in the repository
and end up being homogenized during checkout (and thus archiving).

>> People added untracked files to archives before --add-file existed.
>>
>> --add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file.
>>
>> Additional untracked files would need a manifest to specify which files
>> are (not) covered by the signed commit/tag.  Or the .GIT_ARCHIVE_SIG
>> files could be added just after the signed files as a rule, before any
>> other untracked files, as some kind of a separator.
>
> Or if people do not _exclude_ tracked files from the archive, then
> the verifier who has a tarball and a Git tree object can consult the
> tree object to see which ones are added untracked cruft.

True, but if you have the tree objects then you probably also have the
blobs and don't need the archive?  Or is this some kind of sparse
checkout scenario?

>> Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of
>> the untracked files could optionally be added at the end to allow full
>> verification -- but would require signing at archive creation time.
>
> Yeah, and at that point, it is not much more convenient than just
> signing the whole archive (sans the SIG part, obviously), which is
> what people have always done ;-)

Indeed.

René
Junio C Hamano Feb. 11, 2022, 9:27 p.m. UTC | #11
René Scharfe <l.s.r@web.de> writes:

>> Or if people do not _exclude_ tracked files from the archive, then
>> the verifier who has a tarball and a Git tree object can consult the
>> tree object to see which ones are added untracked cruft.
>
> True, but if you have the tree objects then you probably also have the
> blobs and don't need the archive?  Or is this some kind of sparse
> checkout scenario?

My phrasing was too loose.  This is a "how to verify a distro
tarball" (without having a copy of the project repository, but with
some common tools like "git") scenario.

The verifier has a tarball.  In addition, the verifier knows the
object name of the Git tree object the tarball was taken from, and
somehow trusts that the object name is genuine.  We can do either
"untar + git-add . && git write-tree" or its equivalent to see how
the contents hashes to the expected tree (or not).

How the verifier trusts the object name is out of scope (it may come
from a copy of a signed tag object and a copy of the commit object
that the tag points at and the contents of signed tag object, with
its known format, would allow you to write a stand alone tool to
verify the PGP signature).

Line-end normalization and smudge filter rules may get in the way,
if we truly did "untar" to the filesystem, but I thought "git
archive" didn't do smudge conversion and core.crlf handling when
creating the archive?
René Scharfe Feb. 12, 2022, 9:12 a.m. UTC | #12
Am 11.02.22 um 22:27 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
>
>>> Or if people do not _exclude_ tracked files from the archive, then
>>> the verifier who has a tarball and a Git tree object can consult the
>>> tree object to see which ones are added untracked cruft.
>>
>> True, but if you have the tree objects then you probably also have the
>> blobs and don't need the archive?  Or is this some kind of sparse
>> checkout scenario?
>
> My phrasing was too loose.  This is a "how to verify a distro
> tarball" (without having a copy of the project repository, but with
> some common tools like "git") scenario.
>
> The verifier has a tarball.  In addition, the verifier knows the
> object name of the Git tree object the tarball was taken from, and
> somehow trusts that the object name is genuine.  We can do either
> "untar + git-add . && git write-tree" or its equivalent to see how
> the contents hashes to the expected tree (or not).
>
> How the verifier trusts the object name is out of scope (it may come
> from a copy of a signed tag object and a copy of the commit object
> that the tag points at and the contents of signed tag object, with
> its known format, would allow you to write a stand alone tool to
> verify the PGP signature).

Right, but the tree hash does not directly allow to see which objects
are tracked or not.  This information is necessary to reconstruct the
signed tree.  (Having tracked files first, then the signature file and
then untracked files in the archive would be an easy way to transmit
it.)

> Line-end normalization and smudge filter rules may get in the way,
> if we truly did "untar" to the filesystem, but I thought "git
> archive" didn't do smudge conversion and core.crlf handling when
> creating the archive?

git archive uses convert_to_working_tree() to archive the same file
contents as tar or zip would.

René
Junio C Hamano Feb. 13, 2022, 6:25 a.m. UTC | #13
René Scharfe <l.s.r@web.de> writes:

>> The verifier has a tarball.  In addition, the verifier knows the
>> object name of the Git tree object the tarball was taken from, and
>> somehow trusts that the object name is genuine.  We can do either
>> "untar + git-add . && git write-tree" or its equivalent to see how
>> the contents hashes to the expected tree (or not).
> ...
> Right, but the tree hash does not directly allow to see which objects
> are tracked or not.

Ah, of course---it was silly of me to overlook this obvious fact X-<.
So we do need some extra "manifest" to declare what's untracked etc.,
if we allow --add-file etc. to munge the tree when creating a tarball
out of it.
René Scharfe Feb. 13, 2022, 9:02 a.m. UTC | #14
Am 13.02.22 um 07:25 schrieb Junio C Hamano:
>
> So we do need some extra "manifest" to declare what's untracked etc.,
> if we allow --add-file etc. to munge the tree when creating a tarball
> out of it.

Right, or get that information from the order of files in the archive,
by having tracked files come first, then the signature file with a
certain name and then untracked files.

René
Junio C Hamano Feb. 14, 2022, 5:22 p.m. UTC | #15
René Scharfe <l.s.r@web.de> writes:

> Am 13.02.22 um 07:25 schrieb Junio C Hamano:
>>
>> So we do need some extra "manifest" to declare what's untracked etc.,
>> if we allow --add-file etc. to munge the tree when creating a tarball
>> out of it.
>
> Right, or get that information from the order of files in the archive,
> by having tracked files come first, then the signature file with a
> certain name and then untracked files.

That sounds like a workable approach, modulo that the details of the
"signature file with a certain name" part needs to be worked out.

We should make sure that we clearly document that "--add-file=" and
friends add their material after the contents that come from the
tree-ish, and make sure that the program does so and will stay doing
so.  Otherwise users cannot easily create an archive that follows
the above rule.

Thanks.
diff mbox series

Patch

diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index bc4e76a7834..1b52a0a65a1 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -61,6 +61,17 @@  OPTIONS
 	by concatenating the value for `--prefix` (if any) and the
 	basename of <file>.
 
+--add-file-with-content=<path>:<content>::
+	Add the specified contents to the archive.  Can be repeated to add
+	multiple files.  The path of the file in the archive is built
+	by concatenating the value for `--prefix` (if any) and the
+	basename of <file>.
++
+The `<path>` cannot contain any colon, the file mode is limited to
+a regular file, and the option may be subject platform-dependent
+command-line limits. For non-trivial cases, write an untracked file
+and use `--add-file` instead.
+
 --worktree-attributes::
 	Look for attributes in .gitattributes files in the working tree
 	as well (see <<ATTRIBUTES>>).
diff --git a/archive.c b/archive.c
index a3bbb091256..172efd690c3 100644
--- a/archive.c
+++ b/archive.c
@@ -263,6 +263,7 @@  static int queue_or_write_archive_entry(const struct object_id *oid,
 struct extra_file_info {
 	char *base;
 	struct stat stat;
+	void *content;
 };
 
 int write_archive_entries(struct archiver_args *args,
@@ -337,7 +338,13 @@  int write_archive_entries(struct archiver_args *args,
 		strbuf_addstr(&path_in_archive, basename(path));
 
 		strbuf_reset(&content);
-		if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
+		if (info->content)
+			err = write_entry(args, &fake_oid, path_in_archive.buf,
+					  path_in_archive.len,
+					  info->stat.st_mode,
+					  info->content, info->stat.st_size);
+		else if (strbuf_read_file(&content, path,
+					  info->stat.st_size) < 0)
 			err = error_errno(_("could not read '%s'"), path);
 		else
 			err = write_entry(args, &fake_oid, path_in_archive.buf,
@@ -493,6 +500,7 @@  static void extra_file_info_clear(void *util, const char *str)
 {
 	struct extra_file_info *info = util;
 	free(info->base);
+	free(info->content);
 	free(info);
 }
 
@@ -514,14 +522,38 @@  static int add_file_cb(const struct option *opt, const char *arg, int unset)
 	if (!arg)
 		return -1;
 
-	path = prefix_filename(args->prefix, arg);
-	item = string_list_append_nodup(&args->extra_files, path);
-	item->util = info = xmalloc(sizeof(*info));
+	info = xmalloc(sizeof(*info));
 	info->base = xstrdup_or_null(base);
-	if (stat(path, &info->stat))
-		die(_("File not found: %s"), path);
-	if (!S_ISREG(info->stat.st_mode))
-		die(_("Not a regular file: %s"), path);
+
+	if (strcmp(opt->long_name, "add-file-with-content")) {
+		path = prefix_filename(args->prefix, arg);
+		if (stat(path, &info->stat))
+			die(_("File not found: %s"), path);
+		if (!S_ISREG(info->stat.st_mode))
+			die(_("Not a regular file: %s"), path);
+		info->content = NULL; /* read the file later */
+	} else {
+		const char *colon = strchr(arg, ':');
+		char *p;
+
+		if (!colon)
+			die(_("missing colon: '%s'"), arg);
+
+		p = xstrndup(arg, colon - arg);
+		if (!args->prefix)
+			path = p;
+		else {
+			path = prefix_filename(args->prefix, p);
+			free(p);
+		}
+		memset(&info->stat, 0, sizeof(info->stat));
+		info->stat.st_mode = S_IFREG | 0644;
+		info->content = xstrdup(colon + 1);
+		info->stat.st_size = strlen(info->content);
+	}
+	item = string_list_append_nodup(&args->extra_files, path);
+	item->util = info;
+
 	return 0;
 }
 
@@ -554,6 +586,9 @@  static int parse_archive_args(int argc, const char **argv,
 		{ OPTION_CALLBACK, 0, "add-file", args, N_("file"),
 		  N_("add untracked file to archive"), 0, add_file_cb,
 		  (intptr_t)&base },
+		{ OPTION_CALLBACK, 0, "add-file-with-content", args,
+		  N_("file"), N_("add untracked file to archive"), 0,
+		  add_file_cb, (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index 1e6d18b140e..8ff1257f1a0 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -206,6 +206,18 @@  test_expect_success 'git archive --format=zip --add-file' '
 check_zip with_untracked
 check_added with_untracked untracked untracked
 
+test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' '
+	git archive --format=zip >with_file_with_content.zip \
+		--add-file-with-content=hello:world $EMPTY_TREE &&
+	test_when_finished "rm -rf tmp-unpack" &&
+	mkdir tmp-unpack && (
+		cd tmp-unpack &&
+		"$GIT_UNZIP" ../with_file_with_content.zip &&
+		test_path_is_file hello &&
+		test world = $(cat hello)
+	)
+'
+
 test_expect_success 'git archive --format=zip --add-file twice' '
 	echo untracked >untracked &&
 	git archive --format=zip --prefix=one/ --add-file=untracked \