diff mbox series

[v6,1/3] test: add helper functions for git-bundle

Message ID 20210112022703.1884-2-worldhello.net@gmail.com (mailing list archive)
State Accepted
Commit 9901164d81dfc2e050860f7dd60e5590c3cfaa50
Headers show
Series improvements for git-bundle | expand

Commit Message

Jiang Xin Jan. 12, 2021, 2:27 a.m. UTC
From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Move git-bundle related functions from t5510 to a library, and this lib
will be shared with a new testcase t6020 which finds a known breakage of
"git-bundle".

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5510-fetch.sh           |  26 +--
 t/t6020-bundle-misc.sh     | 394 +++++++++++++++++++++++++++++++++++++
 t/test-bundle-functions.sh |  42 ++++
 3 files changed, 440 insertions(+), 22 deletions(-)
 create mode 100755 t/t6020-bundle-misc.sh
 create mode 100644 t/test-bundle-functions.sh

Comments

Ævar Arnfjörð Bjarmason May 26, 2021, 6:49 p.m. UTC | #1
On Mon, Jan 11 2021, Jiang Xin wrote:

> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>
> Move git-bundle related functions from t5510 to a library, and this lib
> will be shared with a new testcase t6020 which finds a known breakage of
> "git-bundle".
> [...]
> +
> +# Format the output of git commands to make a user-friendly and stable
> +# text.  We can easily prepare the expect text without having to worry
> +# about future changes of the commit ID and spaces of the output.
> +make_user_friendly_and_stable_output () {
> +	sed \
> +		-e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
> +		-e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
> +		-e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
> +		-e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
> +		-e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
> +		-e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
> +		-e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
> +		-e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
> +		-e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
> +		-e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
> +		-e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
> +		-e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
> +		-e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
> +		-e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
> +		-e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
> +		-e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
> +		-e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
> +		-e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
> +		-e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
> +		-e "s/ *\$//"
> +}

On one of the gcc farm boxes, a i386 box (gcc45) this fails because sed
gets killed after >500MB of memory use (I was just eyeballing it in
htop) on the "reate bundle from special rev: main^!" test. This with GNU
sed 4.2.2.

I suspect this regex pattern creates some runaway behavior in sed that's
since been fixed (or maybe it's the glibc regex engine?). The glibc is
2.19-18+deb8u10:
    
    + git bundle list-heads special-rev.bdl
    + make_user_friendly_and_stable_output
    + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e s/[0-9a-f]*/<TAG-3>/g -e s/ *$//
    sed: couldn't re-allocate memory
Jiang Xin May 27, 2021, 11:52 a.m. UTC | #2
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四
上午2:51写道:
>
>
> On Mon, Jan 11 2021, Jiang Xin wrote:
>
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> >
> > Move git-bundle related functions from t5510 to a library, and this
> > lib
> > will be shared with a new testcase t6020 which finds a known
> > breakage of
> > "git-bundle".
> > [...]
> > +
> > +# Format the output of git commands to make a user-friendly and
> > stable
> > +# text.  We can easily prepare the expect text without having to
> > worry
> > +# about future changes of the commit ID and spaces of the output.
> > +make_user_friendly_and_stable_output () {
> > +     sed \
> > +             -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
> > +             -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
> > +             -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
> > +             -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
> > +             -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
> > +             -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
> > +             -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
> > +             -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
> > +             -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
> > +             -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
> > +             -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
> > +             -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
> > +             -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
> > +             -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
> > +             -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
> > +             -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
> > +             -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
> > +             -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
> > +             -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
> > +             -e "s/ *\$//"
> > +}
>
> On one of the gcc farm boxes, a i386 box (gcc45) this fails because
> sed
> gets killed after >500MB of memory use (I was just eyeballing it in
> htop) on the "reate bundle from special rev: main^!" test. This with
> GNU
> sed 4.2.2.
>
> I suspect this regex pattern creates some runaway behavior in sed
> that's
> since been fixed (or maybe it's the glibc regex engine?). The glibc is
> 2.19-18+deb8u10:
>
>     + git bundle list-heads special-rev.bdl
>     + make_user_friendly_and_stable_output
>     + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e
> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e
> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e
> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e
> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e
> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e
> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e
> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e
> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e
> s/[0-9a-f]*/<TAG-3>/g -e s/ *$//
>     sed: couldn't re-allocate memory

I wrote a program on macOS to check memory footprint for sed and perl.
See:

    https://github.com/jiangxin/compare-sed-perl

Test result:

    $ go build && ./compare-sed-perl
    Command: sed  ..., MaxRSS: 901120
    Command: gsed ..., MaxRSS: 2056192
    Command: perl ..., MaxRSS: 2269184

It seems that sed (both the builtin version on macOS and GNU sed v4.8)
has less memory consumed than perl.

Can you run this program on the i386 box (gcc45) to check memory consumed
by sed and perl?

If this issue can be resolved by replacing sed with perl, the following
patch may help:

--- >8 ---
From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Date: Thu, 27 May 2021 14:31:49 +0800
Subject: [PATCH] test: use perl for complex text replacement

Ævar reported that the function `make_user_friendly_and_stable_output()`
failed on a i386 box (gcc45) in the gcc farm boxes with error:

    sed: couldn't re-allocate memory

It seems that sed (GNU sed 4.2.2) gets killed after >500MB of memory
use on the "create bundle from special rev: main^!" test.

Call perl instead of sed for complex text replacement.

Reported-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5411/common-functions.sh | 27 ++++++++++++------------
 t/t5548-push-porcelain.sh   | 20 +++++++++---------
 t/t6020-bundle-misc.sh      | 42 ++++++++++++++++++-------------------
 3 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/t/t5411/common-functions.sh b/t/t5411/common-functions.sh
index 6694858e18..b6d33bdfdc 100644
--- a/t/t5411/common-functions.sh
+++ b/t/t5411/common-functions.sh
@@ -39,19 +39,20 @@ create_commits_in () {
 # remove some locale error messages. The emitted human-readable errors are
 # redundant to the more machine-readable output the tests already assert.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/  *\$//" \
-		-e "s/  */ /g" \
-		-e "s/'/\"/g" \
-		-e "s/	/    /g" \
-		-e "s/$A/<COMMIT-A>/g" \
-		-e "s/$B/<COMMIT-B>/g" \
-		-e "s/$TAG/<TAG-v123>/g" \
-		-e "s/$ZERO_OID/<ZERO-OID>/g" \
-		-e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \
-		-e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \
-		-e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#" \
-		-e "/^error: / d"
+	perl -ne "
+		s/  *\$//;
+		s/  */ /g;
+		s/'/\"/g;
+		s/	/    /g;
+		s/$A/<COMMIT-A>/g;
+		s/$B/<COMMIT-B>/g;
+		s/$TAG/<TAG-v123>/g;
+		s/$ZERO_OID/<ZERO-OID>/g;
+		s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g;
+		s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g;
+		s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#;
+		next if /^error: .*$/;
+		print"
 }
 
 filter_out_user_friendly_and_stable_output () {
diff --git a/t/t5548-push-porcelain.sh b/t/t5548-push-porcelain.sh
index 5a761f3642..95e216973d 100755
--- a/t/t5548-push-porcelain.sh
+++ b/t/t5548-push-porcelain.sh
@@ -44,16 +44,16 @@ create_commits_in () {
 # without having to worry about future changes of the commit ID and spaces
 # of the output.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/  *\$//" \
-		-e "s/   */ /g" \
-		-e "s/	/    /g" \
-		-e "s/$A/<COMMIT-A>/g" \
-		-e "s/$B/<COMMIT-B>/g" \
-		-e "s/$ZERO_OID/<ZERO-OID>/g" \
-		-e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \
-		-e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \
-		-e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#"
+	perl -pe "
+		s/  *\$//;
+		s/   */ /g;
+		s/	/    /g;
+		s/$A/<COMMIT-A>/g;
+		s/$B/<COMMIT-B>/g;
+		s/$ZERO_OID/<ZERO-OID>/g;
+		s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g;
+		s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g;
+		s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#"
 }
 
 setup_upstream_and_workbench () {
diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
index 881f72fd44..f284be820f 100755
--- a/t/t6020-bundle-misc.sh
+++ b/t/t6020-bundle-misc.sh
@@ -84,27 +84,27 @@ test_commit_setvar () {
 # text.  We can easily prepare the expect text without having to worry
 # about future changes of the commit ID and spaces of the output.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
-		-e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
-		-e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
-		-e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
-		-e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
-		-e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
-		-e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
-		-e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
-		-e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
-		-e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
-		-e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
-		-e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
-		-e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
-		-e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
-		-e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
-		-e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
-		-e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
-		-e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
-		-e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
-		-e "s/ *\$//"
+	perl -pe "
+		s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g;
+		s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g;
+		s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g;
+		s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g;
+		s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g;
+		s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g;
+		s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g;
+		s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g;
+		s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g;
+		s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g;
+		s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g;
+		s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g;
+		s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g;
+		s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g;
+		s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g;
+		s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g;
+		s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g;
+		s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g;
+		s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g;
+		s/ *\$//"
 }
 
 #            (C)   (D, pull/1/head, topic/1)
Ævar Arnfjörð Bjarmason May 27, 2021, 12:19 p.m. UTC | #3
On Thu, May 27 2021, Jiang Xin wrote:

> Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四
> 上午2:51写道:
>>
>>
>> On Mon, Jan 11 2021, Jiang Xin wrote:
>>
>> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>> >
>> > Move git-bundle related functions from t5510 to a library, and this
>> > lib
>> > will be shared with a new testcase t6020 which finds a known
>> > breakage of
>> > "git-bundle".
>> > [...]
>> > +
>> > +# Format the output of git commands to make a user-friendly and
>> > stable
>> > +# text.  We can easily prepare the expect text without having to
>> > worry
>> > +# about future changes of the commit ID and spaces of the output.
>> > +make_user_friendly_and_stable_output () {
>> > +     sed \
>> > +             -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
>> > +             -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
>> > +             -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
>> > +             -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
>> > +             -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
>> > +             -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
>> > +             -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
>> > +             -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
>> > +             -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
>> > +             -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
>> > +             -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
>> > +             -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
>> > +             -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
>> > +             -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
>> > +             -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
>> > +             -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
>> > +             -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
>> > +             -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
>> > +             -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
>> > +             -e "s/ *\$//"
>> > +}
>>
>> On one of the gcc farm boxes, a i386 box (gcc45) this fails because
>> sed
>> gets killed after >500MB of memory use (I was just eyeballing it in
>> htop) on the "reate bundle from special rev: main^!" test. This with
>> GNU
>> sed 4.2.2.
>>
>> I suspect this regex pattern creates some runaway behavior in sed
>> that's
>> since been fixed (or maybe it's the glibc regex engine?). The glibc is
>> 2.19-18+deb8u10:
>>
>>     + git bundle list-heads special-rev.bdl
>>     + make_user_friendly_and_stable_output
>>     + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e
>> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e
>> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e
>> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e
>> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e
>> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e
>> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e
>> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e
>> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e
>> s/[0-9a-f]*/<TAG-3>/g -e s/ *$//
>>     sed: couldn't re-allocate memory
>
> I wrote a program on macOS to check memory footprint for sed and perl.
> See:
>
>     https://github.com/jiangxin/compare-sed-perl

Interesting use of Go for as a /usr/bin/time -v replacement :)

After changing your int64 to int32 and digging up how to cross-compile
Go I get similar results, it's because your test has actual short SHA-1s
in the "-e 's///g'"'s, but notice how in the trace I have it's
e.g. "s/[0-9a-f]*/<COMMIT-A>/g".

That's the problem, so that Go command won't reproduce it. Anyway,
changing the test to emit to "input" first and running this shows it:
    
    avar@gcc45:/run/user/1632/git/t/trash directory.t6020-bundle-misc$ /usr/bin/time -v sed -e 's/[0-9a-f]*/<COMMIT-A>/g' -e 's/[0-9a-f]*/<COMMIT-B>/g' -e 's/[0-9a-f]*/<COMMIT-C>/g' -e 's/[0-9a-f]*/<COMMIT-D>/g' -e 's/[0-9a-f]*/<COMMIT-E>/g' -e 's/[0-9a-f]*/<COMMIT-F>/g' -e 's/[0-9a-f]*/<COMMIT-G>/g' -e 's/[0-9a-f]*/<COMMIT-H>/g' -e 's/[0-9a-f]*/<COMMIT-I>/g' -e 's/[0-9a-f]*/<COMMIT-J>/g' -e 's/[0-9a-f]*/<COMMIT-K>/g' -e 's/[0-9a-f]*/<COMMIT-L>/g' -e 's/[0-9a-f]*/<COMMIT-M>/g' -e 's/[0-9a-f]*/<COMMIT-N>/g' -e 's/[0-9a-f]*/<COMMIT-O>/g' -e 's/[0-9a-f]*/<COMMIT-P>/g' -e 's/[0-9a-f]*/<TAG-1>/g' -e 's/[0-9a-f]*/<TAG-2>/g' -e 's/[0-9a-f]*/<TAG-3>/g' -e 's/ *$//' <input
    sed: couldn't re-allocate memory
    Command exited with non-zero status 4
            Command being timed: "sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e s/[0-9a-f]*/<TAG-3>/g -e s/ *$//"
            User time (seconds): 130.00
            System time (seconds): 2.42
            Percent of CPU this job got: 100%
            Elapsed (wall clock) time (h:mm:ss or m:ss): 2:12.41
            Average shared text size (kbytes): 0
            Average unshared data size (kbytes): 0
            Average stack size (kbytes): 0
            Average total size (kbytes): 0
            Maximum resident set size (kbytes): 1030968
            Average resident set size (kbytes): 0
            Major (requiring I/O) page faults: 0
            Minor (reclaiming a frame) page faults: 257333
            Voluntary context switches: 1
            Involuntary context switches: 12578
            Swaps: 0
            File system inputs: 0
            File system outputs: 0
            Socket messages sent: 0
            Socket messages received: 0
            Signals delivered: 0
            Page size (bytes): 4096
            Exit status: 4

But no, the issue as it turns out is not Perl v.s. Sed, it's that
there's some bug in the shellscript / tooling version (happens with both
dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions
like ${A%${A#??????0?}} resolve to nothing.

So if we make that:

        cat >input &&
        cat input >&2 &&
        sed -e "s/${A%${A#??????0?}}[0-9a-f]*/<COMMIT-A>/g" <input >input.tmp && mv input.tmp input &&
        cat input >&2 &&
        sed -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" <input >input.tmp && mv input.tmp input &&
        cat input >&2 &&

We get things like:
    
    + sed -e s/[0-9a-f]*/<COMMIT-A>/g
    + mv input.tmp input
    + cat input
    <COMMIT-A> <COMMIT-A>r<COMMIT-A>s<COMMIT-A>/<COMMIT-A>h<COMMIT-A>s<COMMIT-A>/<COMMIT-A>m<COMMIT-A>i<COMMIT-A>n<COMMIT-A>
    + sed -e s/[0-9a-f]*/<COMMIT-B>/g
    + mv input.tmp input
    + cat input
    <COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B> <COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>r<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>s<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>/<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>h<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>s<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>/<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>m<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>i<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>n<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>
    [...]

etc. I.e. it's the sed expression itself that's the issue. I.e. you
should be able to reproduce this locally with something like:

    echo 0 | sed -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g'

If not just copy the -e a few more times.

Anyway, looking at this whole test file with fresh eyes this pattern
seems very strange. You duplicated most of test_commit with this
test_commit_setvar. It's a bit more verbosity but why not just use:

    test_commit ...
    A=$(git rev-parse HEAD)

Or teach test_commit a --rev-parse option or something and:

    A=$(test_commit ...)

This make_user_friendly_and_stable_output then actually loses
information, e.g. sometimes the bundle output you're testing emits
trailing spaces, but the normalization function overzelously trims that.

I think this whole thing would be much simpler with the above and then
something like:
    
    @@ -146,7 +126,8 @@ test_expect_success 'setup' '
     
            # branch main: merge commit I & J
            git checkout main &&
    -       test_commit_setvar --merge I topic/1 "Merge commit I" &&
    +       git merge --no-edit --no-ff -m"Merge commit I" topic/1 &&
    +       I=$(git rev-parse HEAD) &&
            test_commit_setvar --merge J refs/pull/2/head "Merge commit J" &&
     
            # branch main: commit K
    @@ -172,18 +153,18 @@ test_expect_success 'create bundle from special rev: main^!' '
     
            git bundle list-heads special-rev.bdl |
                    make_user_friendly_and_stable_output >actual &&
    -       cat >expect <<-\EOF &&
    -       <COMMIT-P> refs/heads/main
    +       cat >expect <<-EOF &&
    +       $P refs/heads/main
            EOF
            test_cmp expect actual &&

Or just add a --merge option to test_commit itself.
Jeff King May 27, 2021, 1:48 p.m. UTC | #4
On Thu, May 27, 2021 at 02:19:04PM +0200, Ævar Arnfjörð Bjarmason wrote:

> Anyway, looking at this whole test file with fresh eyes this pattern
> seems very strange. You duplicated most of test_commit with this
> test_commit_setvar. It's a bit more verbosity but why not just use:
> 
>     test_commit ...
>     A=$(git rev-parse HEAD)
> 
> Or teach test_commit a --rev-parse option or something and:
> 
>     A=$(test_commit ...)

The latter would be nice (and maybe even could be the default, as the
stdout is usually just going to --verbose output anyway). But I don't
think it works, because the $() is a subshell, so we lose the side
effect of test_tick incrementing the timestamp counter.

I wasn't following this thread carefully, but your suggestion to just
do "A=$(git rev-parse HEAD)" seems quite readable to me.

-Peff
Felipe Contreras May 27, 2021, 7:19 p.m. UTC | #5
Ævar Arnfjörð Bjarmason wrote:
> 
> On Thu, May 27 2021, Jiang Xin wrote:
> 
> > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四
> > 上午2:51写道:
> >>
> >>
> >> On Mon, Jan 11 2021, Jiang Xin wrote:
> >>
> >> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> >> >
> >> > Move git-bundle related functions from t5510 to a library, and this
> >> > lib
> >> > will be shared with a new testcase t6020 which finds a known
> >> > breakage of
> >> > "git-bundle".
> >> > [...]
> >> > +
> >> > +# Format the output of git commands to make a user-friendly and
> >> > stable
> >> > +# text.  We can easily prepare the expect text without having to
> >> > worry
> >> > +# about future changes of the commit ID and spaces of the output.
> >> > +make_user_friendly_and_stable_output () {
> >> > +     sed \
> >> > +             -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
> >> > +             -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
> >> > +             -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
> >> > +             -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
> >> > +             -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
> >> > +             -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
> >> > +             -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
> >> > +             -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
> >> > +             -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
> >> > +             -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
> >> > +             -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
> >> > +             -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
> >> > +             -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
> >> > +             -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
> >> > +             -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
> >> > +             -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
> >> > +             -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
> >> > +             -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
> >> > +             -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
> >> > +             -e "s/ *\$//"
> >> > +}
> >>
> >> On one of the gcc farm boxes, a i386 box (gcc45) this fails because
> >> sed
> >> gets killed after >500MB of memory use (I was just eyeballing it in
> >> htop) on the "reate bundle from special rev: main^!" test. This with
> >> GNU
> >> sed 4.2.2.
> >>
> >> I suspect this regex pattern creates some runaway behavior in sed
> >> that's
> >> since been fixed (or maybe it's the glibc regex engine?). The glibc is
> >> 2.19-18+deb8u10:
> >>
> >>     + git bundle list-heads special-rev.bdl
> >>     + make_user_friendly_and_stable_output
> >>     + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e
> >> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e
> >> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e
> >> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e
> >> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e
> >> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e
> >> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e
> >> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e
> >> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e
> >> s/[0-9a-f]*/<TAG-3>/g -e s/ *$//
> >>     sed: couldn't re-allocate memory
> >
> > I wrote a program on macOS to check memory footprint for sed and perl.
> > See:
> >
> >     https://github.com/jiangxin/compare-sed-perl
> 
> Interesting use of Go for as a /usr/bin/time -v replacement :)

Here's a Ruby version:
https://dpaste.com/FYT2QKHJE

I'm not sure if will be useful in this particular case, but Ruby code
always ends up simpler ;)
Jiang Xin June 1, 2021, 9:42 a.m. UTC | #6
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道:
>
> But no, the issue as it turns out is not Perl v.s. Sed, it's that
> there's some bug in the shellscript / tooling version (happens with both
> dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions
> like ${A%${A#??????0?}} resolve to nothing.

That's the root cause.  It can be reproduced by running the following
test script:

```
#!/bin/sh
# test script: test.sh

test_commit_setvar () {
        var=$1 &&
        oid=1234567890123456789012345678901234567890 &&
        eval $var=$oid
}

test_commit_setvar A
echo "A: $A"
echo "Abbrev of A: ${A%${A#???????}}"
```

By running different version of dash, we can see that dash 0.5.7 fail the test:

```
$ /opt/dash/0.5.11/bin/dash test.sh
A: 1234567890123456789012345678901234567890
Abbrev of A: 1234567

$ /opt/dash/0.5.7/bin/dash test.sh
A: 1234567890123456789012345678901234567890
Abbrev of A:
```

This issue can be fixed using the following example:

```
#!/bin/sh

test_commit_setvar () {
        var=$1 &&
        oid=1234567890123456789012345678901234567890 &&
        suffix=${oid#???????} &&
        oid=${oid%$suffix} &&
        eval $var=$oid
}

test_commit_setvar A
echo "Abbrev of A: $A"
```

> Anyway, looking at this whole test file with fresh eyes this pattern
> seems very strange. You duplicated most of test_commit with this
> test_commit_setvar. It's a bit more verbosity but why not just use:
>
>     test_commit ...
>     A=$(git rev-parse HEAD)

The function "test_commit()" in "test-lib-function.sh" always creates
tags and it cannot make merge commit. So I rewrite a new function
which reuse the scaffold of "test_commit".

BTW, sorry for the late reply, will send patch later.

--
Jiang Xin
Jiang Xin June 1, 2021, 9:45 a.m. UTC | #7
Felipe Contreras <felipe.contreras@gmail.com> 于2021年5月28日周五 上午3:19写道:
>
> Ævar Arnfjörð Bjarmason wrote:
> >
> > On Thu, May 27 2021, Jiang Xin wrote:
> > > I wrote a program on macOS to check memory footprint for sed and perl.
> > > See:
> > >
> > >     https://github.com/jiangxin/compare-sed-perl
> >
> > Interesting use of Go for as a /usr/bin/time -v replacement :)
>
> Here's a Ruby version:
> https://dpaste.com/FYT2QKHJE

Nice, it's much simpler.

> I'm not sure if will be useful in this particular case, but Ruby code
> always ends up simpler ;)
>
> --
> Felipe Contreras
Ævar Arnfjörð Bjarmason June 1, 2021, 11:50 a.m. UTC | #8
On Tue, Jun 01 2021, Jiang Xin wrote:

> Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道:
>>
>> But no, the issue as it turns out is not Perl v.s. Sed, it's that
>> there's some bug in the shellscript / tooling version (happens with both
>> dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions
>> like ${A%${A#??????0?}} resolve to nothing.
>
> That's the root cause.  It can be reproduced by running the following
> test script:
>
> ```
> #!/bin/sh
> # test script: test.sh
>
> test_commit_setvar () {
>         var=$1 &&
>         oid=1234567890123456789012345678901234567890 &&
>         eval $var=$oid
> }
>
> test_commit_setvar A
> echo "A: $A"
> echo "Abbrev of A: ${A%${A#???????}}"
> ```
>
> By running different version of dash, we can see that dash 0.5.7 fail the test:
>
> ```
> $ /opt/dash/0.5.11/bin/dash test.sh
> A: 1234567890123456789012345678901234567890
> Abbrev of A: 1234567
>
> $ /opt/dash/0.5.7/bin/dash test.sh
> A: 1234567890123456789012345678901234567890
> Abbrev of A:
> ```
>
> This issue can be fixed using the following example:
>
> ```
> #!/bin/sh
>
> test_commit_setvar () {
>         var=$1 &&
>         oid=1234567890123456789012345678901234567890 &&
>         suffix=${oid#???????} &&
>         oid=${oid%$suffix} &&
>         eval $var=$oid
> }
>
> test_commit_setvar A
> echo "Abbrev of A: $A"
> ```

*nod*

>> Anyway, looking at this whole test file with fresh eyes this pattern
>> seems very strange. You duplicated most of test_commit with this
>> test_commit_setvar. It's a bit more verbosity but why not just use:
>>
>>     test_commit ...
>>     A=$(git rev-parse HEAD)
>
> The function "test_commit()" in "test-lib-function.sh" always creates
> tags and it cannot make merge commit. So I rewrite a new function
> which reuse the scaffold of "test_commit".

It's had a --no-tag since 3803a3a099 (t: add --no-tag option to
test_commit, 2021-02-09). I also have patches in "next" to add more
options, you can just add more, having a --merge and maybe a way to tell
it to eval the rev-parse into a given variable seem like sensible
additions.

> BTW, sorry for the late reply, will send patch later.

My main point was that looking at this I think it's very much over the
complexity v.s. benefit line on the "complexity" side.

Even if there wasn't a --no-tag just using "test_commit" with a "git tag
-d" and "commit_X=$(git rev-parse HEAD)" is less magical and more
readable.

I.e. the mostly copy/pasted from test-lib-functions.sh function is ~70
lines, the whole setup function is 50 lines.

And as I noted with the whitespace getting lost in the munging the end
result is actually less reliable than just doing a test_cmp with $(git
rev-parse ...) instead of <COMMIT-XYZ>.

If you were trying to avoid the whitespace warnings then see the
"'s/Z$//'" pattern in t0000-basic.sh for how we've usually tested that,
i.e. had a "Z" at the end mark intentional whitespace for
test_cmp-alike.

There's a big value in the test suite being mostly consistent (which it
somewhat isn't, but we're hopefully getting there). I.e. the goal isn't
to optimize each test file to be as small as possible, but to e.g. have
the next person maintaining it not wondering where <COMMIT-P> comes
from, understanding some test_commit-alike that eval's variables into
existence, how it's subtly different (if at all) from test_commit etc.
Jiang Xin June 1, 2021, 1:20 p.m. UTC | #9
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月1日周二 下午8:04写道:
>
>
> On Tue, Jun 01 2021, Jiang Xin wrote:
>
> > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道:
> >>
> >> But no, the issue as it turns out is not Perl v.s. Sed, it's that
> >> there's some bug in the shellscript / tooling version (happens with both
> >> dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions
> >> like ${A%${A#??????0?}} resolve to nothing.
> >
> > That's the root cause.  It can be reproduced by running the following
> > test script:
> >
> > ```
> > #!/bin/sh
> > # test script: test.sh
> >
> > test_commit_setvar () {
> >         var=$1 &&
> >         oid=1234567890123456789012345678901234567890 &&
> >         eval $var=$oid
> > }
> >
> > test_commit_setvar A
> > echo "A: $A"
> > echo "Abbrev of A: ${A%${A#???????}}"
> > ```
> >
> > By running different version of dash, we can see that dash 0.5.7 fail the test:
> >
> > ```
> > $ /opt/dash/0.5.11/bin/dash test.sh
> > A: 1234567890123456789012345678901234567890
> > Abbrev of A: 1234567
> >
> > $ /opt/dash/0.5.7/bin/dash test.sh
> > A: 1234567890123456789012345678901234567890
> > Abbrev of A:
> > ```
> >
> > This issue can be fixed using the following example:
> >
> > ```
> > #!/bin/sh
> >
> > test_commit_setvar () {
> >         var=$1 &&
> >         oid=1234567890123456789012345678901234567890 &&
> >         suffix=${oid#???????} &&
> >         oid=${oid%$suffix} &&
> >         eval $var=$oid
> > }
> >
> > test_commit_setvar A
> > echo "Abbrev of A: $A"
> > ```
>
> *nod*
>
> >> Anyway, looking at this whole test file with fresh eyes this pattern
> >> seems very strange. You duplicated most of test_commit with this
> >> test_commit_setvar. It's a bit more verbosity but why not just use:
> >>
> >>     test_commit ...
> >>     A=$(git rev-parse HEAD)
> >
> > The function "test_commit()" in "test-lib-function.sh" always creates
> > tags and it cannot make merge commit. So I rewrite a new function
> > which reuse the scaffold of "test_commit".
>
> It's had a --no-tag since 3803a3a099 (t: add --no-tag option to
> test_commit, 2021-02-09). I also have patches in "next" to add more
> options, you can just add more, having a --merge and maybe a way to tell
> it to eval the rev-parse into a given variable seem like sensible
> additions.
>
> > BTW, sorry for the late reply, will send patch later.
>
> My main point was that looking at this I think it's very much over the
> complexity v.s. benefit line on the "complexity" side.
>
> Even if there wasn't a --no-tag just using "test_commit" with a "git tag
> -d" and "commit_X=$(git rev-parse HEAD)" is less magical and more
> readable.
>
> I.e. the mostly copy/pasted from test-lib-functions.sh function is ~70
> lines, the whole setup function is 50 lines.
>
> And as I noted with the whitespace getting lost in the munging the end
> result is actually less reliable than just doing a test_cmp with $(git
> rev-parse ...) instead of <COMMIT-XYZ>.
>
> If you were trying to avoid the whitespace warnings then see the
> "'s/Z$//'" pattern in t0000-basic.sh for how we've usually tested that,
> i.e. had a "Z" at the end mark intentional whitespace for
> test_cmp-alike.
>
> There's a big value in the test suite being mostly consistent (which it
> somewhat isn't, but we're hopefully getting there). I.e. the goal isn't
> to optimize each test file to be as small as possible, but to e.g. have
> the next person maintaining it not wondering where <COMMIT-P> comes
> from, understanding some test_commit-alike that eval's variables into
> existence, how it's subtly different (if at all) from test_commit etc.

Will send a patch for quick fix for t6020 which is broken on older
version of bash.

After changes on "test_commit()" of "test-lib-function.sh" has been
merge to master branch, I will try to refactor t6020 again to remove
`test_commit_setvar()` and reuse `test_commit()`.
diff mbox series

Patch

diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index 2013051a64..1e398380eb 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -6,22 +6,10 @@  test_description='Per branch config variables affects "git fetch".
 '
 
 . ./test-lib.sh
+. "$TEST_DIRECTORY"/test-bundle-functions.sh
 
 D=$(pwd)
 
-test_bundle_object_count () {
-	git verify-pack -v "$1" >verify.out &&
-	test "$2" = $(grep "^$OID_REGEX " verify.out | wc -l)
-}
-
-convert_bundle_to_pack () {
-	while read x && test -n "$x"
-	do
-		:;
-	done
-	cat
-}
-
 test_expect_success setup '
 	echo >file original &&
 	git add file &&
@@ -312,9 +300,7 @@  test_expect_success 'unbundle 1' '
 
 test_expect_success 'bundle 1 has only 3 files ' '
 	cd "$D" &&
-	convert_bundle_to_pack <bundle1 >bundle.pack &&
-	git index-pack bundle.pack &&
-	test_bundle_object_count bundle.pack 3
+	test_bundle_object_count bundle1 3
 '
 
 test_expect_success 'unbundle 2' '
@@ -329,9 +315,7 @@  test_expect_success 'bundle does not prerequisite objects' '
 	git add file2 &&
 	git commit -m add.file2 file2 &&
 	git bundle create bundle3 -1 HEAD &&
-	convert_bundle_to_pack <bundle3 >bundle.pack &&
-	git index-pack bundle.pack &&
-	test_bundle_object_count bundle.pack 3
+	test_bundle_object_count bundle3 3
 '
 
 test_expect_success 'bundle should be able to create a full history' '
@@ -884,9 +868,7 @@  test_expect_success 'all boundary commits are excluded' '
 	git merge otherside &&
 	ad=$(git log --no-walk --format=%ad HEAD) &&
 	git bundle create twoside-boundary.bdl main --since="$ad" &&
-	convert_bundle_to_pack <twoside-boundary.bdl >twoside-boundary.pack &&
-	pack=$(git index-pack --fix-thin --stdin <twoside-boundary.pack) &&
-	test_bundle_object_count .git/objects/pack/pack-${pack##pack	}.pack 3
+	test_bundle_object_count --thin twoside-boundary.bdl 3
 '
 
 test_expect_success 'fetch --prune prints the remotes url' '
diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
new file mode 100755
index 0000000000..201f34b5c3
--- /dev/null
+++ b/t/t6020-bundle-misc.sh
@@ -0,0 +1,394 @@ 
+#!/bin/sh
+#
+# Copyright (c) 2021 Jiang Xin
+#
+
+test_description='Test git-bundle'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/test-bundle-functions.sh
+
+# Create a commit or tag and set the variable with the object ID.
+test_commit_setvar () {
+	notick=
+	signoff=
+	indir=
+	merge=
+	tag=
+	var=
+
+	while test $# != 0
+	do
+		case "$1" in
+		--merge)
+			merge=t
+			;;
+		--tag)
+			tag=t
+			;;
+		--notick)
+			notick=t
+			;;
+		--signoff)
+			signoff="$1"
+			;;
+		-C)
+			shift
+			indir="$1"
+			;;
+		-*)
+			echo >&2 "error: unknown option $1"
+			return 1
+			;;
+		*)
+			break
+			;;
+		esac
+		shift
+	done
+	if test $# -lt 2
+	then
+		echo >&2 "error: test_commit_setvar must have at least 2 arguments"
+		return 1
+	fi
+	var=$1
+	shift
+	indir=${indir:+"$indir"/}
+	if test -z "$notick"
+	then
+		test_tick
+	fi &&
+	if test -n "$merge"
+	then
+		git ${indir:+ -C "$indir"} merge --no-edit --no-ff \
+			${2:+-m "$2"} "$1" &&
+		oid=$(git ${indir:+ -C "$indir"} rev-parse HEAD)
+	elif test -n "$tag"
+	then
+		git ${indir:+ -C "$indir"} tag -m "$1" "$1" "${2:-HEAD}" &&
+		oid=$(git ${indir:+ -C "$indir"} rev-parse "$1")
+	else
+		file=${2:-"$1.t"} &&
+		echo "${3-$1}" >"$indir$file" &&
+		git ${indir:+ -C "$indir"} add "$file" &&
+		git ${indir:+ -C "$indir"} commit $signoff -m "$1" &&
+		oid=$(git ${indir:+ -C "$indir"} rev-parse HEAD)
+	fi &&
+	eval $var=$oid
+}
+
+# Format the output of git commands to make a user-friendly and stable
+# text.  We can easily prepare the expect text without having to worry
+# about future changes of the commit ID and spaces of the output.
+make_user_friendly_and_stable_output () {
+	sed \
+		-e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
+		-e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
+		-e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
+		-e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
+		-e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
+		-e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
+		-e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
+		-e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
+		-e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
+		-e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
+		-e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
+		-e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
+		-e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
+		-e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
+		-e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
+		-e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
+		-e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
+		-e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
+		-e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
+		-e "s/ *\$//"
+}
+
+#            (C)   (D, pull/1/head, topic/1)
+#             o --- o
+#            /       \                              (L)
+#           /         \        o (H, topic/2)             (M, tag:v2)
+#          /    (F)    \      /                                 (N, tag:v3)
+#         /      o --------- o (G, pull/2/head)      o --- o --- o (release)
+#        /      /        \    \                      /       \
+#  o --- o --- o -------- o -- o ------------------ o ------- o --- o (main)
+# (A)   (B)  (E, tag:v1) (I)  (J)                  (K)       (O)   (P)
+#
+test_expect_success 'setup' '
+	# Try to make a stable fixed width for abbreviated commit ID,
+	# this fixed-width oid will be replaced with "<OID>".
+	git config core.abbrev 7 &&
+
+	# branch main: commit A & B
+	test_commit_setvar A "Commit A" main.txt &&
+	test_commit_setvar B "Commit B" main.txt &&
+
+	# branch topic/1: commit C & D, refs/pull/1/head
+	git checkout -b topic/1 &&
+	test_commit_setvar C "Commit C" topic-1.txt &&
+	test_commit_setvar D "Commit D" topic-1.txt &&
+	git update-ref refs/pull/1/head HEAD &&
+
+	# branch topic/1: commit E, tag v1
+	git checkout main &&
+	test_commit_setvar E "Commit E" main.txt &&
+	test_commit_setvar --tag TAG1 v1 &&
+
+	# branch topic/2: commit F & G, refs/pull/2/head
+	git checkout -b topic/2 &&
+	test_commit_setvar F "Commit F" topic-2.txt &&
+	test_commit_setvar G "Commit G" topic-2.txt &&
+	git update-ref refs/pull/2/head HEAD &&
+	test_commit_setvar H "Commit H" topic-2.txt &&
+
+	# branch main: merge commit I & J
+	git checkout main &&
+	test_commit_setvar --merge I topic/1 "Merge commit I" &&
+	test_commit_setvar --merge J refs/pull/2/head "Merge commit J" &&
+
+	# branch main: commit K
+	git checkout main &&
+	test_commit_setvar K "Commit K" main.txt &&
+
+	# branch release:
+	git checkout -b release &&
+	test_commit_setvar L "Commit L" release.txt &&
+	test_commit_setvar M "Commit M" release.txt &&
+	test_commit_setvar --tag TAG2 v2 &&
+	test_commit_setvar N "Commit N" release.txt &&
+	test_commit_setvar --tag TAG3 v3 &&
+
+	# branch main: merge commit O, commit P
+	git checkout main &&
+	test_commit_setvar --merge O tags/v2 "Merge commit O" &&
+	test_commit_setvar P "Commit P" main.txt
+'
+
+test_expect_failure 'create bundle from special rev: main^!' '
+	git bundle create special-rev.bdl "main^!" &&
+
+	git bundle list-heads special-rev.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	<COMMIT-P> refs/heads/main
+	EOF
+	test_i18ncmp expect actual &&
+
+	git bundle verify special-rev.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	The bundle contains this ref:
+	<COMMIT-P> refs/heads/main
+	The bundle requires this ref:
+	<COMMIT-O>
+	EOF
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count special-rev.bdl 3
+'
+
+test_expect_success 'create bundle with --max-count option' '
+	git bundle create max-count.bdl --max-count 1 \
+		main \
+		"^release" \
+		refs/tags/v1 \
+		refs/pull/1/head \
+		refs/pull/2/head &&
+
+	git bundle verify max-count.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	The bundle contains these 2 refs:
+	<COMMIT-P> refs/heads/main
+	<TAG-1> refs/tags/v1
+	The bundle requires this ref:
+	<COMMIT-O>
+	EOF
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count max-count.bdl 4
+'
+
+test_expect_success 'create bundle with --since option' '
+	git log -1 --pretty="%ad" $M >actual &&
+	cat >expect <<-\EOF &&
+	Thu Apr 7 15:26:13 2005 -0700
+	EOF
+	test_cmp expect actual &&
+
+	git bundle create since.bdl \
+		--since "Thu Apr 7 15:27:00 2005 -0700" \
+		--all &&
+
+	git bundle verify since.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	The bundle contains these 5 refs:
+	<COMMIT-P> refs/heads/main
+	<COMMIT-N> refs/heads/release
+	<TAG-2> refs/tags/v2
+	<TAG-3> refs/tags/v3
+	<COMMIT-P> HEAD
+	The bundle requires these 2 refs:
+	<COMMIT-M>
+	<COMMIT-K>
+	EOF
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count --thin since.bdl 13
+'
+
+test_expect_success 'create bundle 1 - no prerequisites' '
+	git bundle create 1.bdl topic/1 topic/2 &&
+
+	cat >expect <<-\EOF &&
+	The bundle contains these 2 refs:
+	<COMMIT-D> refs/heads/topic/1
+	<COMMIT-H> refs/heads/topic/2
+	The bundle records a complete history.
+	EOF
+
+	# verify bundle, which has no prerequisites
+	git bundle verify 1.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count 1.bdl 24
+'
+
+test_expect_success 'create bundle 2 - has prerequisites' '
+	git bundle create 2.bdl \
+		--ignore-missing \
+		^topic/deleted \
+		^$D \
+		^topic/2 \
+		release &&
+
+	cat >expect <<-\EOF &&
+	The bundle contains this ref:
+	<COMMIT-N> refs/heads/release
+	The bundle requires these 3 refs:
+	<COMMIT-D>
+	<COMMIT-E>
+	<COMMIT-G>
+	EOF
+
+	git bundle verify 2.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count 2.bdl 16
+'
+
+test_expect_success 'fail to verify bundle without prerequisites' '
+	git init --bare test1.git &&
+
+	cat >expect <<-\EOF &&
+	error: Repository lacks these prerequisite commits:
+	error: <COMMIT-D>
+	error: <COMMIT-E>
+	error: <COMMIT-G>
+	EOF
+
+	test_must_fail git -C test1.git bundle verify ../2.bdl 2>&1 |
+		make_user_friendly_and_stable_output >actual &&
+	test_i18ncmp expect actual
+'
+
+test_expect_success 'create bundle 3 - two refs, same object' '
+	git bundle create --version=3 3.bdl \
+		^release \
+		^topic/1 \
+		^topic/2 \
+		main \
+		HEAD &&
+
+	cat >expect <<-\EOF &&
+	The bundle contains these 2 refs:
+	<COMMIT-P> refs/heads/main
+	<COMMIT-P> HEAD
+	The bundle requires these 2 refs:
+	<COMMIT-M>
+	<COMMIT-K>
+	EOF
+
+	git bundle verify 3.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count 3.bdl 4
+'
+
+test_expect_success 'create bundle 4 - with tags' '
+	git bundle create 4.bdl \
+		^main \
+		^release \
+		^topic/1 \
+		^topic/2 \
+		--all &&
+
+	cat >expect <<-\EOF &&
+	The bundle contains these 3 refs:
+	<TAG-1> refs/tags/v1
+	<TAG-2> refs/tags/v2
+	<TAG-3> refs/tags/v3
+	The bundle records a complete history.
+	EOF
+
+	git bundle verify 4.bdl |
+		make_user_friendly_and_stable_output >actual &&
+	test_i18ncmp expect actual &&
+
+	test_bundle_object_count 4.bdl 3
+'
+
+test_expect_success 'clone from bundle' '
+	git clone --mirror 1.bdl mirror.git &&
+	git -C mirror.git show-ref |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	<COMMIT-D> refs/heads/topic/1
+	<COMMIT-H> refs/heads/topic/2
+	EOF
+	test_cmp expect actual &&
+
+	git -C mirror.git fetch ../2.bdl "+refs/*:refs/*" &&
+	git -C mirror.git show-ref |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	<COMMIT-N> refs/heads/release
+	<COMMIT-D> refs/heads/topic/1
+	<COMMIT-H> refs/heads/topic/2
+	EOF
+	test_cmp expect actual &&
+
+	git -C mirror.git fetch ../3.bdl "+refs/*:refs/*" &&
+	git -C mirror.git show-ref |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	<COMMIT-P> refs/heads/main
+	<COMMIT-N> refs/heads/release
+	<COMMIT-D> refs/heads/topic/1
+	<COMMIT-H> refs/heads/topic/2
+	EOF
+	test_cmp expect actual &&
+
+	git -C mirror.git fetch ../4.bdl "+refs/*:refs/*" &&
+	git -C mirror.git show-ref |
+		make_user_friendly_and_stable_output >actual &&
+	cat >expect <<-\EOF &&
+	<COMMIT-P> refs/heads/main
+	<COMMIT-N> refs/heads/release
+	<COMMIT-D> refs/heads/topic/1
+	<COMMIT-H> refs/heads/topic/2
+	<TAG-1> refs/tags/v1
+	<TAG-2> refs/tags/v2
+	<TAG-3> refs/tags/v3
+	EOF
+	test_cmp expect actual
+'
+
+test_done
diff --git a/t/test-bundle-functions.sh b/t/test-bundle-functions.sh
new file mode 100644
index 0000000000..cf7ed818b2
--- /dev/null
+++ b/t/test-bundle-functions.sh
@@ -0,0 +1,42 @@ 
+# Library of git-bundle related functions.
+
+# Display the pack data contained in the bundle file, bypassing the
+# header that contains the signature, prerequisites and references.
+convert_bundle_to_pack () {
+	while read x && test -n "$x"
+	do
+		:;
+	done
+	cat
+}
+
+# Check count of objects in a bundle file.
+# We can use "--thin" opiton to check thin pack, which must be fixed by
+# command `git-index-pack --fix-thin --stdin`.
+test_bundle_object_count () {
+	thin=
+	if test "$1" = "--thin"
+	then
+		thin=t
+		shift
+	fi
+	if test $# -ne 2
+	then
+		echo >&2 "args should be: <bundle> <count>"
+		return 1
+	fi
+	bundle=$1
+	pack=$bundle.pack
+	convert_bundle_to_pack <"$bundle" >"$pack" &&
+	if test -n "$thin"
+	then
+		mv "$pack" "$bundle.thin.pack" &&
+		git index-pack --stdin --fix-thin "$pack" <"$bundle.thin.pack"
+	else
+		git index-pack "$pack"
+	fi || return 1
+	count=$(git show-index <"${pack%pack}idx" | wc -l) &&
+	test $2 = $count && return 0
+	echo >&2 "error: object count for $bundle is $count, not $2"
+	return 1
+}