diff mbox series

[v3,3/5,GSOC] ref-filter: --format=%(raw) re-support --perl

Message ID ac5d98647dad31ebe165daa7cbf84f2b7e5fbe80.1627136062.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series ref-filter: add %(raw) and %(rest) atoms | expand

Commit Message

ZheNing Hu July 24, 2021, 2:14 p.m. UTC
From: ZheNing Hu <adlternative@gmail.com>

Because the perl language can handle binary data correctly,
add the function perl_quote_buf_with_len(), which can specify
the length of the data and prevent the data from being truncated
at '\0' to help `--format="%(raw)"` re-support `--perl`.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
---
 Documentation/git-for-each-ref.txt |  4 ++--
 quote.c                            | 17 +++++++++++++++++
 quote.h                            |  1 +
 ref-filter.c                       | 15 +++++++++++----
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
 5 files changed, 48 insertions(+), 8 deletions(-)

Comments

Jacob Keller July 25, 2021, 8:27 a.m. UTC | #1
On Sat, Jul 24, 2021 at 7:14 AM ZheNing Hu via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: ZheNing Hu <adlternative@gmail.com>
>
> Because the perl language can handle binary data correctly,
> add the function perl_quote_buf_with_len(), which can specify
> the length of the data and prevent the data from being truncated
> at '\0' to help `--format="%(raw)"` re-support `--perl`.
>
> Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> ---

I presume that we used to support raw with perl before in cat-file? It
did read a bit weird to see "Re-support" in the title, because the
previous patch which introduced the raw atom when in ref-filter code
never supported perl. Still, I think it's fairly clear either way and
that change wouldn't deserve a reroll on its own.

Makes sense. Reviewed-by: Jacob Keller <jacob.keller@gmail.com>

>  Documentation/git-for-each-ref.txt |  4 ++--
>  quote.c                            | 17 +++++++++++++++++
>  quote.h                            |  1 +
>  ref-filter.c                       | 15 +++++++++++----
>  t/t6300-for-each-ref.sh            | 19 +++++++++++++++++--
>  5 files changed, 48 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
> index cbb6f87d13f..6da899c6296 100644
> --- a/Documentation/git-for-each-ref.txt
> +++ b/Documentation/git-for-each-ref.txt
> @@ -241,8 +241,8 @@ raw:size::
>         The raw data size of the object.
>
>  Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
> -`--perl` because such language may not support arbitrary binary data in their
> -string variable type.
> +because such language may not support arbitrary binary data in their string
> +variable type.
>
>  The message in a commit or a tag object is `contents`, from which
>  `contents:<part>` can be used to extract various parts out of:
> diff --git a/quote.c b/quote.c
> index 8a3a5e39eb1..26719d21d1e 100644
> --- a/quote.c
> +++ b/quote.c
> @@ -471,6 +471,23 @@ void perl_quote_buf(struct strbuf *sb, const char *src)
>         strbuf_addch(sb, sq);
>  }
>
> +void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
> +{
> +       const char sq = '\'';
> +       const char bq = '\\';
> +       const char *c = src;
> +       const char *end = src + len;
> +
> +       strbuf_addch(sb, sq);
> +       while (c != end) {
> +               if (*c == sq || *c == bq)
> +                       strbuf_addch(sb, bq);
> +               strbuf_addch(sb, *c);
> +               c++;
> +       }
> +       strbuf_addch(sb, sq);
> +}
> +
>  void python_quote_buf(struct strbuf *sb, const char *src)
>  {
>         const char sq = '\'';
> diff --git a/quote.h b/quote.h
> index 768cc6338e2..0fe69e264b0 100644
> --- a/quote.h
> +++ b/quote.h
> @@ -94,6 +94,7 @@ char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
>
>  /* quoting as a string literal for other languages */
>  void perl_quote_buf(struct strbuf *sb, const char *src);
> +void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
>  void python_quote_buf(struct strbuf *sb, const char *src);
>  void tcl_quote_buf(struct strbuf *sb, const char *src);
>  void basic_regex_quote_buf(struct strbuf *sb, const char *src);
> diff --git a/ref-filter.c b/ref-filter.c
> index 01545618101..597354c941d 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -746,7 +746,10 @@ static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
>                 sq_quote_buf(s, str);
>                 break;
>         case QUOTE_PERL:
> -               perl_quote_buf(s, str);
> +               if (len != -1)
> +                       perl_quote_buf_with_len(s, str, len);
> +               else
> +                       perl_quote_buf(s, str);
>                 break;
>         case QUOTE_PYTHON:
>                 python_quote_buf(s, str);
> @@ -1010,10 +1013,14 @@ int verify_ref_format(struct ref_format *format)
>                 at = parse_ref_filter_atom(format, sp + 2, ep, &err);
>                 if (at < 0)
>                         die("%s", err.buf);
> -               if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
> -                   used_atom[at].u.raw_data.option == RAW_BARE)
> +
> +               if ((format->quote_style == QUOTE_PYTHON ||
> +                    format->quote_style == QUOTE_SHELL ||
> +                    format->quote_style == QUOTE_TCL) &&
> +                    used_atom[at].atom_type == ATOM_RAW &&
> +                    used_atom[at].u.raw_data.option == RAW_BARE)
>                         die(_("--format=%.*s cannot be used with"
> -                             "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
> +                             "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
>                 cp = ep + 1;
>

Right, so now we have to check all of the format quote styles instead
of just checking that its set to any of them, since we want to
continue in the case of perl.

Ok.

>                 if (skip_prefix(used_atom[at].name, "color:", &color))
> diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
> index 18554f62d94..3d15d0a5360 100755
> --- a/t/t6300-for-each-ref.sh
> +++ b/t/t6300-for-each-ref.sh
> @@ -915,8 +915,23 @@ test_expect_success '%(raw) with --tcl must fail' '
>         test_must_fail git for-each-ref --format="%(raw)" --tcl
>  '
>
> -test_expect_success '%(raw) with --perl must fail' '
> -       test_must_fail git for-each-ref --format="%(raw)" --perl
> +test_expect_success '%(raw) with --perl' '
> +       git for-each-ref --format="\$name= %(raw);
> +print \"\$name\"" refs/myblobs/blob1 --perl | perl >actual &&
> +       cmp blob1 actual &&
> +       git for-each-ref --format="\$name= %(raw);
> +print \"\$name\"" refs/myblobs/blob3 --perl | perl >actual &&
> +       cmp blob3 actual &&
> +       git for-each-ref --format="\$name= %(raw);
> +print \"\$name\"" refs/myblobs/blob8 --perl | perl >actual &&
> +       cmp blob8 actual &&
> +       git for-each-ref --format="\$name= %(raw);
> +print \"\$name\"" refs/myblobs/first --perl | perl >actual &&
> +       cmp one actual &&
> +       git cat-file tree refs/mytrees/first > expected &&
> +       git for-each-ref --format="\$name= %(raw);
> +print \"\$name\"" refs/mytrees/first --perl | perl >actual &&
> +       cmp expected actual
>  '
>
>  test_expect_success '%(raw) with --shell must fail' '
> --
> gitgitgadget
>
ZheNing Hu July 25, 2021, 1:18 p.m. UTC | #2
Jacob Keller <jacob.keller@gmail.com> 于2021年7月25日周日 下午4:27写道:
>
> On Sat, Jul 24, 2021 at 7:14 AM ZheNing Hu via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: ZheNing Hu <adlternative@gmail.com>
> >
> > Because the perl language can handle binary data correctly,
> > add the function perl_quote_buf_with_len(), which can specify
> > the length of the data and prevent the data from being truncated
> > at '\0' to help `--format="%(raw)"` re-support `--perl`.
> >
> > Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> > Signed-off-by: ZheNing Hu <adlternative@gmail.com>
> > ---
>
> I presume that we used to support raw with perl before in cat-file? It
> did read a bit weird to see "Re-support" in the title, because the
> previous patch which introduced the raw atom when in ref-filter code
> never supported perl. Still, I think it's fairly clear either way and
> that change wouldn't deserve a reroll on its own.
>

No, we haven't support it. It can be said that this commit is only split from
the previous commit. Therefore, "re-support" is indeed wrong. I will modify it.

> Makes sense. Reviewed-by: Jacob Keller <jacob.keller@gmail.com>
>
> > @@ -1010,10 +1013,14 @@ int verify_ref_format(struct ref_format *format)
> >                 at = parse_ref_filter_atom(format, sp + 2, ep, &err);
> >                 if (at < 0)
> >                         die("%s", err.buf);
> > -               if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
> > -                   used_atom[at].u.raw_data.option == RAW_BARE)
> > +
> > +               if ((format->quote_style == QUOTE_PYTHON ||
> > +                    format->quote_style == QUOTE_SHELL ||
> > +                    format->quote_style == QUOTE_TCL) &&
> > +                    used_atom[at].atom_type == ATOM_RAW &&
> > +                    used_atom[at].u.raw_data.option == RAW_BARE)
> >                         die(_("--format=%.*s cannot be used with"
> > -                             "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
> > +                             "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
> >                 cp = ep + 1;
> >
>
> Right, so now we have to check all of the format quote styles instead
> of just checking that its set to any of them, since we want to
> continue in the case of perl.
>
> Ok.
>

Thanks.
--
ZheNing Hu
diff mbox series

Patch

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index cbb6f87d13f..6da899c6296 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -241,8 +241,8 @@  raw:size::
 	The raw data size of the object.
 
 Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`,
-`--perl` because such language may not support arbitrary binary data in their
-string variable type.
+because such language may not support arbitrary binary data in their string
+variable type.
 
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
diff --git a/quote.c b/quote.c
index 8a3a5e39eb1..26719d21d1e 100644
--- a/quote.c
+++ b/quote.c
@@ -471,6 +471,23 @@  void perl_quote_buf(struct strbuf *sb, const char *src)
 	strbuf_addch(sb, sq);
 }
 
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len)
+{
+	const char sq = '\'';
+	const char bq = '\\';
+	const char *c = src;
+	const char *end = src + len;
+
+	strbuf_addch(sb, sq);
+	while (c != end) {
+		if (*c == sq || *c == bq)
+			strbuf_addch(sb, bq);
+		strbuf_addch(sb, *c);
+		c++;
+	}
+	strbuf_addch(sb, sq);
+}
+
 void python_quote_buf(struct strbuf *sb, const char *src)
 {
 	const char sq = '\'';
diff --git a/quote.h b/quote.h
index 768cc6338e2..0fe69e264b0 100644
--- a/quote.h
+++ b/quote.h
@@ -94,6 +94,7 @@  char *quote_path(const char *in, const char *prefix, struct strbuf *out, unsigne
 
 /* quoting as a string literal for other languages */
 void perl_quote_buf(struct strbuf *sb, const char *src);
+void perl_quote_buf_with_len(struct strbuf *sb, const char *src, size_t len);
 void python_quote_buf(struct strbuf *sb, const char *src);
 void tcl_quote_buf(struct strbuf *sb, const char *src);
 void basic_regex_quote_buf(struct strbuf *sb, const char *src);
diff --git a/ref-filter.c b/ref-filter.c
index 01545618101..597354c941d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -746,7 +746,10 @@  static void quote_formatting(struct strbuf *s, const char *str, size_t len, int
 		sq_quote_buf(s, str);
 		break;
 	case QUOTE_PERL:
-		perl_quote_buf(s, str);
+		if (len != -1)
+			perl_quote_buf_with_len(s, str, len);
+		else
+			perl_quote_buf(s, str);
 		break;
 	case QUOTE_PYTHON:
 		python_quote_buf(s, str);
@@ -1010,10 +1013,14 @@  int verify_ref_format(struct ref_format *format)
 		at = parse_ref_filter_atom(format, sp + 2, ep, &err);
 		if (at < 0)
 			die("%s", err.buf);
-		if (format->quote_style && used_atom[at].atom_type == ATOM_RAW &&
-		    used_atom[at].u.raw_data.option == RAW_BARE)
+
+		if ((format->quote_style == QUOTE_PYTHON ||
+		     format->quote_style == QUOTE_SHELL ||
+		     format->quote_style == QUOTE_TCL) &&
+		     used_atom[at].atom_type == ATOM_RAW &&
+		     used_atom[at].u.raw_data.option == RAW_BARE)
 			die(_("--format=%.*s cannot be used with"
-			      "--python, --shell, --tcl, --perl"), (int)(ep - sp - 2), sp + 2);
+			      "--python, --shell, --tcl"), (int)(ep - sp - 2), sp + 2);
 		cp = ep + 1;
 
 		if (skip_prefix(used_atom[at].name, "color:", &color))
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 18554f62d94..3d15d0a5360 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -915,8 +915,23 @@  test_expect_success '%(raw) with --tcl must fail' '
 	test_must_fail git for-each-ref --format="%(raw)" --tcl
 '
 
-test_expect_success '%(raw) with --perl must fail' '
-	test_must_fail git for-each-ref --format="%(raw)" --perl
+test_expect_success '%(raw) with --perl' '
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob1 --perl | perl >actual &&
+	cmp blob1 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob3 --perl | perl >actual &&
+	cmp blob3 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/blob8 --perl | perl >actual &&
+	cmp blob8 actual &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/myblobs/first --perl | perl >actual &&
+	cmp one actual &&
+	git cat-file tree refs/mytrees/first > expected &&
+	git for-each-ref --format="\$name= %(raw);
+print \"\$name\"" refs/mytrees/first --perl | perl >actual &&
+	cmp expected actual
 '
 
 test_expect_success '%(raw) with --shell must fail' '