diff mbox series

[v5,3/3] ref-filter: add support for %(contents:size)

Message ID 20200716121940.21041-4-chriscool@tuxfamily.org (mailing list archive)
State New, archived
Headers show
Series Add support for %(contents:size) in ref-filter | expand

Commit Message

Christian Couder July 16, 2020, 12:19 p.m. UTC
It's useful and efficient to be able to get the size of the
contents directly without having to pipe through `wc -c`.

Also the result of the following:

`git for-each-ref --format='%(contents)' refs/heads/my-branch | wc -c`

is off by one as `git for-each-ref` appends a newline character
after the contents, which can be seen by comparing its output
with the output from `git cat-file`.

As with %(contents), %(contents:size) is silently ignored, if a
ref points to something other than a commit or a tag:

```
$ git update-ref refs/mytrees/first HEAD^{tree}
$ git for-each-ref --format='%(contents)' refs/mytrees/first

$ git for-each-ref --format='%(contents:size)' refs/mytrees/first

```

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/git-for-each-ref.txt |  3 +++
 ref-filter.c                       |  7 ++++++-
 t/t6300-for-each-ref.sh            | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

Comments

Alban Gruin July 31, 2020, 5:37 p.m. UTC | #1
Hi Christian,

Le 16/07/2020 à 14:19, Christian Couder a écrit :
> It's useful and efficient to be able to get the size of the
> contents directly without having to pipe through `wc -c`.
> 
> Also the result of the following:
> 
> `git for-each-ref --format='%(contents)' refs/heads/my-branch | wc -c`
> 
> is off by one as `git for-each-ref` appends a newline character
> after the contents, which can be seen by comparing its output
> with the output from `git cat-file`.
> 
> As with %(contents), %(contents:size) is silently ignored, if a
> ref points to something other than a commit or a tag:
> 
> ```
> $ git update-ref refs/mytrees/first HEAD^{tree}
> $ git for-each-ref --format='%(contents)' refs/mytrees/first
> 
> $ git for-each-ref --format='%(contents:size)' refs/mytrees/first
> 
> ```
> 
> Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
> ---
>  Documentation/git-for-each-ref.txt |  3 +++
>  ref-filter.c                       |  7 ++++++-
>  t/t6300-for-each-ref.sh            | 19 +++++++++++++++++++
>  3 files changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
> index b739412c30..2ea71c5f6c 100644
> --- a/Documentation/git-for-each-ref.txt
> +++ b/Documentation/git-for-each-ref.txt
> @@ -235,6 +235,9 @@ and `date` to extract the named component.
>  The message in a commit or a tag object is `contents`, from which
>  `contents:<part>` can be used to extract various parts out of:
>  
> +contents:size::
> +	The size in bytes of the commit or tag message.
> +
>  contents:subject::
>  	The first paragraph of the message, which typically is a
>  	single line, is taken as the "subject" of the commit or the
> diff --git a/ref-filter.c b/ref-filter.c
> index 8447cb09be..73d8bfa86d 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -127,7 +127,8 @@ static struct used_atom {
>  			unsigned int nobracket : 1, push : 1, push_remote : 1;
>  		} remote_ref;
>  		struct {
> -			enum { C_BARE, C_BODY, C_BODY_DEP, C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
> +			enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH,
> +			       C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
>  			struct process_trailer_options trailer_opts;
>  			unsigned int nlines;
>  		} contents;
> @@ -338,6 +339,8 @@ static int contents_atom_parser(const struct ref_format *format, struct used_ato
>  		atom->u.contents.option = C_BARE;
>  	else if (!strcmp(arg, "body"))
>  		atom->u.contents.option = C_BODY;
> +	else if (!strcmp(arg, "size"))
> +		atom->u.contents.option = C_LENGTH;
>  	else if (!strcmp(arg, "signature"))
>  		atom->u.contents.option = C_SIG;
>  	else if (!strcmp(arg, "subject"))
> @@ -1253,6 +1256,8 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
>  			v->s = copy_subject(subpos, sublen);
>  		else if (atom->u.contents.option == C_BODY_DEP)
>  			v->s = xmemdupz(bodypos, bodylen);
> +		else if (atom->u.contents.option == C_LENGTH)
> +			v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos));
>  		else if (atom->u.contents.option == C_BODY)
>  			v->s = xmemdupz(bodypos, nonsiglen);
>  		else if (atom->u.contents.option == C_SIG)
> diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
> index e9f468d360..ea9bb6dade 100755
> --- a/t/t6300-for-each-ref.sh
> +++ b/t/t6300-for-each-ref.sh
> @@ -52,6 +52,25 @@ test_atom() {
>  		sanitize_pgp <actual >actual.clean &&
>  		test_cmp expected actual.clean
>  	"
> +	# Automatically test "contents:size" atom after testing "contents"
> +	if test "$2" = "contents"
> +	then
> +		case $(git cat-file -t "$ref") in
> +		tag)
> +			# We cannot use $3 as it expects sanitize_pgp to run
> +			expect=$(git cat-file tag $ref | tail -n +6 | wc -c) ;;
> +		tree | blob)
> +			expect='' ;;
> +		commit)
> +			expect=$(printf '%s' "$3" | wc -c) ;;
> +		esac
> +		# Leave $expect unquoted to lose possible leading whitespaces
> +		echo $expect >expected
> +		test_expect_${4:-sucess} $PREREQ "basic atom: $1 contents:size" '

There is a typo here, and $expect is written to `expected', but
`test_cmp' wants `expect'.  Fixing those mistakes does not reveal any
broken tests.

> +			git for-each-ref --format="%(contents:size)" "$ref" >actual &&
> +			test_cmp expect actual
> +		'
> +	fi
>  }
>  
>  hexlen=$(test_oid hexsz)
> 

Alban
Jeff King July 31, 2020, 5:45 p.m. UTC | #2
On Fri, Jul 31, 2020 at 07:37:22PM +0200, Alban Gruin wrote:

> > +		# Leave $expect unquoted to lose possible leading whitespaces
> > +		echo $expect >expected
> > +		test_expect_${4:-sucess} $PREREQ "basic atom: $1 contents:size" '
> 
> There is a typo here, and $expect is written to `expected', but
> `test_cmp' wants `expect'.  Fixing those mistakes does not reveal any
> broken tests.

I thought at first you meant that the typo was s/expected/expect, and
wondered how this could possibly have passed. But the typo is
s/sucess/success/, so we were in fact not running the test at all (and
were generating "test_expect_sucess: not found" messages to stderr, but
outside of any test block. Yikes.

Thanks for spotting.

-Peff
Christian Couder July 31, 2020, 8:12 p.m. UTC | #3
Hi Alban and Peff,

On Fri, Jul 31, 2020 at 7:45 PM Jeff King <peff@peff.net> wrote:
>
> On Fri, Jul 31, 2020 at 07:37:22PM +0200, Alban Gruin wrote:
>
> > > +           # Leave $expect unquoted to lose possible leading whitespaces
> > > +           echo $expect >expected
> > > +           test_expect_${4:-sucess} $PREREQ "basic atom: $1 contents:size" '
> >
> > There is a typo here, and $expect is written to `expected', but
> > `test_cmp' wants `expect'.  Fixing those mistakes does not reveal any
> > broken tests.
>
> I thought at first you meant that the typo was s/expected/expect, and
> wondered how this could possibly have passed. But the typo is
> s/sucess/success/, so we were in fact not running the test at all (and
> were generating "test_expect_sucess: not found" messages to stderr, but
> outside of any test block. Yikes.
>
> Thanks for spotting.

Yeah, I copied a suggestion from Junio in the last iteration without
properly checking it. Sorry about that and thanks for spotting and
fixing it.
Junio C Hamano July 31, 2020, 8:30 p.m. UTC | #4
Christian Couder <christian.couder@gmail.com> writes:

> Hi Alban and Peff,
>
> On Fri, Jul 31, 2020 at 7:45 PM Jeff King <peff@peff.net> wrote:
>>
>> On Fri, Jul 31, 2020 at 07:37:22PM +0200, Alban Gruin wrote:
>>
>> > > +           # Leave $expect unquoted to lose possible leading whitespaces
>> > > +           echo $expect >expected
>> > > +           test_expect_${4:-sucess} $PREREQ "basic atom: $1 contents:size" '
>> >
>> > There is a typo here, and $expect is written to `expected', but
>> > `test_cmp' wants `expect'.  Fixing those mistakes does not reveal any
>> > broken tests.
>>
>> I thought at first you meant that the typo was s/expected/expect, and
>> wondered how this could possibly have passed. But the typo is
>> s/sucess/success/, so we were in fact not running the test at all (and
>> were generating "test_expect_sucess: not found" messages to stderr, but
>> outside of any test block. Yikes.
>>
>> Thanks for spotting.
>
> Yeah, I copied a suggestion from Junio in the last iteration without
> properly checking it. Sorry about that and thanks for spotting and
> fixing it.

I probably should stop giving "perhaps along the lines of this"
suggestion too lightly and/or when I do not have enough time to
apply and test myself.  Sorry for the gotcha.
Jeff King July 31, 2020, 8:40 p.m. UTC | #5
On Fri, Jul 31, 2020 at 01:30:19PM -0700, Junio C Hamano wrote:

> > Yeah, I copied a suggestion from Junio in the last iteration without
> > properly checking it. Sorry about that and thanks for spotting and
> > fixing it.
> 
> I probably should stop giving "perhaps along the lines of this"
> suggestion too lightly and/or when I do not have enough time to
> apply and test myself.  Sorry for the gotcha.

I dunno. I appreciate getting them, especially in patch form. It's often
a more precise description than hand-wavy English, and being a patch
makes it easy to apply into my tree as a starting point. The real trick
is that the receiver needs to know enough to distrust the suggestion and
take ownership of it. Maybe you just need a bigger disclaimer. ;)

(Only half-joking; I do try to say "not tested" or "not even compiled"
when that is the case in stuff I sent out, but I'm sure I'm not
consistent).

-Peff
diff mbox series

Patch

diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt
index b739412c30..2ea71c5f6c 100644
--- a/Documentation/git-for-each-ref.txt
+++ b/Documentation/git-for-each-ref.txt
@@ -235,6 +235,9 @@  and `date` to extract the named component.
 The message in a commit or a tag object is `contents`, from which
 `contents:<part>` can be used to extract various parts out of:
 
+contents:size::
+	The size in bytes of the commit or tag message.
+
 contents:subject::
 	The first paragraph of the message, which typically is a
 	single line, is taken as the "subject" of the commit or the
diff --git a/ref-filter.c b/ref-filter.c
index 8447cb09be..73d8bfa86d 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -127,7 +127,8 @@  static struct used_atom {
 			unsigned int nobracket : 1, push : 1, push_remote : 1;
 		} remote_ref;
 		struct {
-			enum { C_BARE, C_BODY, C_BODY_DEP, C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
+			enum { C_BARE, C_BODY, C_BODY_DEP, C_LENGTH,
+			       C_LINES, C_SIG, C_SUB, C_TRAILERS } option;
 			struct process_trailer_options trailer_opts;
 			unsigned int nlines;
 		} contents;
@@ -338,6 +339,8 @@  static int contents_atom_parser(const struct ref_format *format, struct used_ato
 		atom->u.contents.option = C_BARE;
 	else if (!strcmp(arg, "body"))
 		atom->u.contents.option = C_BODY;
+	else if (!strcmp(arg, "size"))
+		atom->u.contents.option = C_LENGTH;
 	else if (!strcmp(arg, "signature"))
 		atom->u.contents.option = C_SIG;
 	else if (!strcmp(arg, "subject"))
@@ -1253,6 +1256,8 @@  static void grab_sub_body_contents(struct atom_value *val, int deref, void *buf)
 			v->s = copy_subject(subpos, sublen);
 		else if (atom->u.contents.option == C_BODY_DEP)
 			v->s = xmemdupz(bodypos, bodylen);
+		else if (atom->u.contents.option == C_LENGTH)
+			v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos));
 		else if (atom->u.contents.option == C_BODY)
 			v->s = xmemdupz(bodypos, nonsiglen);
 		else if (atom->u.contents.option == C_SIG)
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index e9f468d360..ea9bb6dade 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -52,6 +52,25 @@  test_atom() {
 		sanitize_pgp <actual >actual.clean &&
 		test_cmp expected actual.clean
 	"
+	# Automatically test "contents:size" atom after testing "contents"
+	if test "$2" = "contents"
+	then
+		case $(git cat-file -t "$ref") in
+		tag)
+			# We cannot use $3 as it expects sanitize_pgp to run
+			expect=$(git cat-file tag $ref | tail -n +6 | wc -c) ;;
+		tree | blob)
+			expect='' ;;
+		commit)
+			expect=$(printf '%s' "$3" | wc -c) ;;
+		esac
+		# Leave $expect unquoted to lose possible leading whitespaces
+		echo $expect >expected
+		test_expect_${4:-sucess} $PREREQ "basic atom: $1 contents:size" '
+			git for-each-ref --format="%(contents:size)" "$ref" >actual &&
+			test_cmp expect actual
+		'
+	fi
 }
 
 hexlen=$(test_oid hexsz)