diff mbox series

[v3,2/7] archive --add-file-with-contents: allow paths containing colons

Message ID ce4b1b680c98d0f55d4d307b8c746a81d90ffa06.1651677919.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series scalar: implement the subcommand "diagnose" | expand

Commit Message

Johannes Schindelin May 4, 2022, 3:25 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

By allowing the path to be enclosed in double-quotes, we can avoid
the limitation that paths cannot contain colons.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Documentation/git-archive.txt | 13 +++++++++----
 archive.c                     | 34 +++++++++++++++++++++++++++++-----
 t/t5003-archive-zip.sh        |  8 ++++++++
 3 files changed, 46 insertions(+), 9 deletions(-)

Comments

Elijah Newren May 7, 2022, 2:06 a.m. UTC | #1
On Wed, May 4, 2022 at 8:25 AM Johannes Schindelin via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> By allowing the path to be enclosed in double-quotes, we can avoid
> the limitation that paths cannot contain colons.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>  Documentation/git-archive.txt | 13 +++++++++----
>  archive.c                     | 34 +++++++++++++++++++++++++++++-----
>  t/t5003-archive-zip.sh        |  8 ++++++++
>  3 files changed, 46 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> index a0edc9167b2..1789ce4c232 100644
> --- a/Documentation/git-archive.txt
> +++ b/Documentation/git-archive.txt
> @@ -67,10 +67,15 @@ OPTIONS
>         by concatenating the value for `--prefix` (if any) and the
>         basename of <file>.
>  +
> -The `<path>` cannot contain any colon, the file mode is limited to
> -a regular file, and the option may be subject to platform-dependent
> -command-line limits. For non-trivial cases, write an untracked file
> -and use `--add-file` instead.
> +The `<path>` argument can start and end with a literal double-quote
> +character. In this case, the backslash is interpreted as escape
> +character. The path must be quoted if it contains a colon, to avoid
> +the colon from being misinterpreted as the separator between the
> +path and the contents.

The path must also be quoted if it begins or ends with a double-quote, right?

Also, would people want to be able to pass a pathname from the output
of e.g. `git ls-files -o`, which may quote additional characters?

> ++
> +The file mode is limited to a regular file, and the option may be
> +subject to platform-dependent command-line limits. For non-trivial
> +cases, write an untracked file and use `--add-file` instead.
>
>  --worktree-attributes::
>         Look for attributes in .gitattributes files in the working tree
> diff --git a/archive.c b/archive.c
> index d798624cd5f..3b751027143 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -533,13 +533,37 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
>                         die(_("Not a regular file: %s"), path);
>                 info->content = NULL; /* read the file later */
>         } else {
> -               const char *colon = strchr(arg, ':');
>                 char *p;
>
> -               if (!colon)
> -                       die(_("missing colon: '%s'"), arg);
> +               if (*arg != '"') {
> +                       const char *colon = strchr(arg, ':');
> +
> +                       if (!colon)
> +                               die(_("missing colon: '%s'"), arg);
> +                       p = xstrndup(arg, colon - arg);
> +                       arg = colon + 1;
> +               } else {
> +                       struct strbuf buf = STRBUF_INIT;
> +                       const char *orig = arg;
> +
> +                       for (;;) {
> +                               if (!*(++arg))
> +                                       die(_("unclosed quote: '%s'"), orig);
> +                               if (*arg == '"')
> +                                       break;
> +                               if (*arg == '\\' && *(++arg) == '\0')
> +                                       die(_("trailing backslash: '%s"), orig);
> +                               else
> +                                       strbuf_addch(&buf, *arg);
> +                       }
> +
> +                       if (*(++arg) != ':')
> +                               die(_("missing colon: '%s'"), orig);
> +
> +                       p = strbuf_detach(&buf, NULL);
> +                       arg++;
> +               }

Should we use unquote_c_style() here instead of rolling another parser
to do unquoting?  That would have the added benefit of allowing people
to use filenames from the output of various git commands that do
special quoting -- such as octal sequences for non-ascii characters.

>
> -               p = xstrndup(arg, colon - arg);
>                 if (!args->prefix)
>                         path = p;
>                 else {
> @@ -548,7 +572,7 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
>                 }
>                 memset(&info->stat, 0, sizeof(info->stat));
>                 info->stat.st_mode = S_IFREG | 0644;
> -               info->content = xstrdup(colon + 1);
> +               info->content = xstrdup(arg);
>                 info->stat.st_size = strlen(info->content);
>         }
>         item = string_list_append_nodup(&args->extra_files, path);
> diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
> index 8ff1257f1a0..5b8bbfc2692 100755
> --- a/t/t5003-archive-zip.sh
> +++ b/t/t5003-archive-zip.sh
> @@ -207,13 +207,21 @@ check_zip with_untracked
>  check_added with_untracked untracked untracked
>
>  test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' '
> +       if test_have_prereq FUNNYNAMES
> +       then
> +               QUOTED=quoted:colon
> +       else
> +               QUOTED=quoted
> +       fi &&
>         git archive --format=zip >with_file_with_content.zip \
> +               --add-file-with-content=\"$QUOTED\": \
>                 --add-file-with-content=hello:world $EMPTY_TREE &&
>         test_when_finished "rm -rf tmp-unpack" &&
>         mkdir tmp-unpack && (
>                 cd tmp-unpack &&
>                 "$GIT_UNZIP" ../with_file_with_content.zip &&
>                 test_path_is_file hello &&
> +               test_path_is_file $QUOTED &&
>                 test world = $(cat hello)
>         )
>  '
> --
> gitgitgadget
Johannes Schindelin May 9, 2022, 9:04 p.m. UTC | #2
Hi Elijah,

On Fri, 6 May 2022, Elijah Newren wrote:

> On Wed, May 4, 2022 at 8:25 AM Johannes Schindelin via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: Johannes Schindelin <johannes.schindelin@gmx.de>
> >
> > By allowing the path to be enclosed in double-quotes, we can avoid
> > the limitation that paths cannot contain colons.
> >
> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> > ---
> >  Documentation/git-archive.txt | 13 +++++++++----
> >  archive.c                     | 34 +++++++++++++++++++++++++++++-----
> >  t/t5003-archive-zip.sh        |  8 ++++++++
> >  3 files changed, 46 insertions(+), 9 deletions(-)
> >
> > diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
> > index a0edc9167b2..1789ce4c232 100644
> > --- a/Documentation/git-archive.txt
> > +++ b/Documentation/git-archive.txt
> > @@ -67,10 +67,15 @@ OPTIONS
> >         by concatenating the value for `--prefix` (if any) and the
> >         basename of <file>.
> >  +
> > -The `<path>` cannot contain any colon, the file mode is limited to
> > -a regular file, and the option may be subject to platform-dependent
> > -command-line limits. For non-trivial cases, write an untracked file
> > -and use `--add-file` instead.
> > +The `<path>` argument can start and end with a literal double-quote
> > +character. In this case, the backslash is interpreted as escape
> > +character. The path must be quoted if it contains a colon, to avoid
> > +the colon from being misinterpreted as the separator between the
> > +path and the contents.
>
> The path must also be quoted if it begins or ends with a double-quote, right?

True.

> Also, would people want to be able to pass a pathname from the output
> of e.g. `git ls-files -o`, which may quote additional characters?

Also true.

> > ++
> > +The file mode is limited to a regular file, and the option may be
> > +subject to platform-dependent command-line limits. For non-trivial
> > +cases, write an untracked file and use `--add-file` instead.
> >
> >  --worktree-attributes::
> >         Look for attributes in .gitattributes files in the working tree
> > diff --git a/archive.c b/archive.c
> > index d798624cd5f..3b751027143 100644
> > --- a/archive.c
> > +++ b/archive.c
> > @@ -533,13 +533,37 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
> >                         die(_("Not a regular file: %s"), path);
> >                 info->content = NULL; /* read the file later */
> >         } else {
> > -               const char *colon = strchr(arg, ':');
> >                 char *p;
> >
> > -               if (!colon)
> > -                       die(_("missing colon: '%s'"), arg);
> > +               if (*arg != '"') {
> > +                       const char *colon = strchr(arg, ':');
> > +
> > +                       if (!colon)
> > +                               die(_("missing colon: '%s'"), arg);
> > +                       p = xstrndup(arg, colon - arg);
> > +                       arg = colon + 1;
> > +               } else {
> > +                       struct strbuf buf = STRBUF_INIT;
> > +                       const char *orig = arg;
> > +
> > +                       for (;;) {
> > +                               if (!*(++arg))
> > +                                       die(_("unclosed quote: '%s'"), orig);
> > +                               if (*arg == '"')
> > +                                       break;
> > +                               if (*arg == '\\' && *(++arg) == '\0')
> > +                                       die(_("trailing backslash: '%s"), orig);
> > +                               else
> > +                                       strbuf_addch(&buf, *arg);
> > +                       }
> > +
> > +                       if (*(++arg) != ':')
> > +                               die(_("missing colon: '%s'"), orig);
> > +
> > +                       p = strbuf_detach(&buf, NULL);
> > +                       arg++;
> > +               }
>
> Should we use unquote_c_style() here instead of rolling another parser
> to do unquoting?  That would have the added benefit of allowing people
> to use filenames from the output of various git commands that do
> special quoting -- such as octal sequences for non-ascii characters.

Yep, let's do that. I somehow missed that function while glimpsing at
`quote.h`.

Thank you for your review!
Dscho

> >
> > -               p = xstrndup(arg, colon - arg);
> >                 if (!args->prefix)
> >                         path = p;
> >                 else {
> > @@ -548,7 +572,7 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset)
> >                 }
> >                 memset(&info->stat, 0, sizeof(info->stat));
> >                 info->stat.st_mode = S_IFREG | 0644;
> > -               info->content = xstrdup(colon + 1);
> > +               info->content = xstrdup(arg);
> >                 info->stat.st_size = strlen(info->content);
> >         }
> >         item = string_list_append_nodup(&args->extra_files, path);
> > diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
> > index 8ff1257f1a0..5b8bbfc2692 100755
> > --- a/t/t5003-archive-zip.sh
> > +++ b/t/t5003-archive-zip.sh
> > @@ -207,13 +207,21 @@ check_zip with_untracked
> >  check_added with_untracked untracked untracked
> >
> >  test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' '
> > +       if test_have_prereq FUNNYNAMES
> > +       then
> > +               QUOTED=quoted:colon
> > +       else
> > +               QUOTED=quoted
> > +       fi &&
> >         git archive --format=zip >with_file_with_content.zip \
> > +               --add-file-with-content=\"$QUOTED\": \
> >                 --add-file-with-content=hello:world $EMPTY_TREE &&
> >         test_when_finished "rm -rf tmp-unpack" &&
> >         mkdir tmp-unpack && (
> >                 cd tmp-unpack &&
> >                 "$GIT_UNZIP" ../with_file_with_content.zip &&
> >                 test_path_is_file hello &&
> > +               test_path_is_file $QUOTED &&
> >                 test world = $(cat hello)
> >         )
> >  '
> > --
> > gitgitgadget
>
>
diff mbox series

Patch

diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index a0edc9167b2..1789ce4c232 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -67,10 +67,15 @@  OPTIONS
 	by concatenating the value for `--prefix` (if any) and the
 	basename of <file>.
 +
-The `<path>` cannot contain any colon, the file mode is limited to
-a regular file, and the option may be subject to platform-dependent
-command-line limits. For non-trivial cases, write an untracked file
-and use `--add-file` instead.
+The `<path>` argument can start and end with a literal double-quote
+character. In this case, the backslash is interpreted as escape
+character. The path must be quoted if it contains a colon, to avoid
+the colon from being misinterpreted as the separator between the
+path and the contents.
++
+The file mode is limited to a regular file, and the option may be
+subject to platform-dependent command-line limits. For non-trivial
+cases, write an untracked file and use `--add-file` instead.
 
 --worktree-attributes::
 	Look for attributes in .gitattributes files in the working tree
diff --git a/archive.c b/archive.c
index d798624cd5f..3b751027143 100644
--- a/archive.c
+++ b/archive.c
@@ -533,13 +533,37 @@  static int add_file_cb(const struct option *opt, const char *arg, int unset)
 			die(_("Not a regular file: %s"), path);
 		info->content = NULL; /* read the file later */
 	} else {
-		const char *colon = strchr(arg, ':');
 		char *p;
 
-		if (!colon)
-			die(_("missing colon: '%s'"), arg);
+		if (*arg != '"') {
+			const char *colon = strchr(arg, ':');
+
+			if (!colon)
+				die(_("missing colon: '%s'"), arg);
+			p = xstrndup(arg, colon - arg);
+			arg = colon + 1;
+		} else {
+			struct strbuf buf = STRBUF_INIT;
+			const char *orig = arg;
+
+			for (;;) {
+				if (!*(++arg))
+					die(_("unclosed quote: '%s'"), orig);
+				if (*arg == '"')
+					break;
+				if (*arg == '\\' && *(++arg) == '\0')
+					die(_("trailing backslash: '%s"), orig);
+				else
+					strbuf_addch(&buf, *arg);
+			}
+
+			if (*(++arg) != ':')
+				die(_("missing colon: '%s'"), orig);
+
+			p = strbuf_detach(&buf, NULL);
+			arg++;
+		}
 
-		p = xstrndup(arg, colon - arg);
 		if (!args->prefix)
 			path = p;
 		else {
@@ -548,7 +572,7 @@  static int add_file_cb(const struct option *opt, const char *arg, int unset)
 		}
 		memset(&info->stat, 0, sizeof(info->stat));
 		info->stat.st_mode = S_IFREG | 0644;
-		info->content = xstrdup(colon + 1);
+		info->content = xstrdup(arg);
 		info->stat.st_size = strlen(info->content);
 	}
 	item = string_list_append_nodup(&args->extra_files, path);
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index 8ff1257f1a0..5b8bbfc2692 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -207,13 +207,21 @@  check_zip with_untracked
 check_added with_untracked untracked untracked
 
 test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' '
+	if test_have_prereq FUNNYNAMES
+	then
+		QUOTED=quoted:colon
+	else
+		QUOTED=quoted
+	fi &&
 	git archive --format=zip >with_file_with_content.zip \
+		--add-file-with-content=\"$QUOTED\": \
 		--add-file-with-content=hello:world $EMPTY_TREE &&
 	test_when_finished "rm -rf tmp-unpack" &&
 	mkdir tmp-unpack && (
 		cd tmp-unpack &&
 		"$GIT_UNZIP" ../with_file_with_content.zip &&
 		test_path_is_file hello &&
+		test_path_is_file $QUOTED &&
 		test world = $(cat hello)
 	)
 '