diff mbox series

[v3,1/3] t5323: test cases for git-pack-redundant

Message ID 20190102043456.15652-2-worldhello.net@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] t5323: test cases for git-pack-redundant | expand

Commit Message

Jiang Xin Jan. 2, 2019, 4:34 a.m. UTC
From: Jiang Xin <zhiyou.jx@alibaba-inc.com>

Add test cases for git pack-redundant to validate new algorithm for git
pack-redundant.

Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
---
 t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100755 t/t5323-pack-redundant.sh

Comments

SZEDER Gábor Jan. 9, 2019, 12:56 p.m. UTC | #1
On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> 
> Add test cases for git pack-redundant to validate new algorithm for git
> pack-redundant.
> 
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> ---
>  t/t5323-pack-redundant.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 84 insertions(+)
>  create mode 100755 t/t5323-pack-redundant.sh
> 
> diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
> new file mode 100755
> index 0000000000..ef6076f065
> --- /dev/null
> +++ b/t/t5323-pack-redundant.sh
> @@ -0,0 +1,84 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2018 Jiang Xin
> +#
> +
> +test_description='git redundant test'

s/redundant/pack-redundant/ ?

> +
> +. ./test-lib.sh
> +
> +create_commits()
> +{
> +	set -e
> +	parent=
> +	for name in A B C D E F G H I J K L M
> +	do
> +		test_tick
> +		T=$(git write-tree)
> +		if test -z "$parent"
> +		then
> +			sha1=$(echo $name | git commit-tree $T)

There is a considerable effort going on to switch from SHA-1 to a
different hash function, so please don't add any new $sha1 variable;
call it $oid or $commit instead.

> +		else
> +			sha1=$(echo $name | git commit-tree -p $parent $T)
> +		fi
> +		eval $name=$sha1
> +		parent=$sha1
> +	done
> +	git update-ref refs/heads/master $M
> +}
> +
> +create_redundant_packs()
> +{
> +	set -e
> +	cd .git/objects/pack
> +	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
> +	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
> +	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
> +	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
> +	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
> +	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
> +	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
> +	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
> +	cd -
> +	eval P$P1=P1:$P1
> +	eval P$P2=P2:$P2
> +	eval P$P3=P3:$P3
> +	eval P$P4=P4:$P4
> +	eval P$P5=P5:$P5
> +	eval P$P6=P6:$P6
> +	eval P$P7=P7:$P7
> +	eval P$P8=P8:$P8
> +}
> +
> +# Create commits and packs
> +create_commits
> +create_redundant_packs

Please perform all setup tasks in a test_expect_success block, so we
get verbose and trace output about what's going on.

Don't use 'set -e', use an &&-chain instead.  To fail the test if a
command in the for loop were to fail you could do something like this:

  for ....
  do
    do-this &&
    do-that ||
    return 1
  done

> +
> +test_expect_success 'clear loose objects' '
> +	git prune-packed &&
> +	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0

Use something like

  find .git/objects -type f | grep -v pack >out &&
  test_must_be_empty out

instead, so we get an informative error message on failure.

> +'
> +
> +cat >expected <<EOF
> +P1:$P1
> +P4:$P4
> +P5:$P5
> +P6:$P6
> +EOF
> +
> +test_expect_success 'git pack-redundant --all' '
> +	git pack-redundant --all | \

Don't run a git command (especially the particular command the test
script focuses on) upstream of a pipe, because it hides the command's
exit code.  Use an intermediate file instead.

> +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \

This sed command doesn't seem to work on macOS (on Travis CI), and
causes the test to fail with:

  ++git pack-redundant --all
  ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
  ++sort -u
  ++read p
  ++sort
  ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
  ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
  ++test_cmp expected actual
  ++diff -u expected actual
  --- expected    2019-01-09 01:53:45.000000000 +0000
  +++ actual      2019-01-09 01:53:45.000000000 +0000
  @@ -1,4 +0,0 @@
  -P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  error: last command exited with $?=1
  not ok 2 - git pack-redundant --all

I'm not sure what's wrong with it, though.

Minor nit: 'git pack-redundant' prints one filename per line, so the
'g' at the end of the 's###g' is not necessary.

> +		sort -u | \
> +		while read p; do eval echo "\${P$p}"; done | \
> +		sort > actual && \

Style nit: no space between redirection operator and filename

> +	test_cmp expected actual
> +'
> +
> +test_expect_success 'remove redundant packs' '
> +	git pack-redundant --all | xargs rm &&
> +	git fsck &&
> +	test $(git pack-redundant --all | wc -l) -eq 0
> +'
> +
> +test_done
> -- 
> 2.14.5.agit.2
>
SZEDER Gábor Jan. 9, 2019, 4:47 p.m. UTC | #2
On Wed, Jan 09, 2019 at 01:56:28PM +0100, SZEDER Gábor wrote:
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF

Creating the expected results could be moved into the
test_expect_success block as well.

> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +	git pack-redundant --all | \
> 
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
> 
> > +		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> 
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
> 
>   ++git pack-redundant --all
>   ++sed -e 's#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g'
>   ++sort -u
>   ++read p
>   ++sort
>   ++eval echo '${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}'
>   ./test-lib.sh: line 697: ${P.git/objects/pack/pack-0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e.idx}: bad substitution
>   ++test_cmp expected actual
>   ++diff -u expected actual
>   --- expected    2019-01-09 01:53:45.000000000 +0000
>   +++ actual      2019-01-09 01:53:45.000000000 +0000
>   @@ -1,4 +0,0 @@
>   -P1:24ee080366509364d04a138cd4e168dc4ff33354
>   -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
>   -P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
>   -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
>   error: last command exited with $?=1
>   not ok 2 - git pack-redundant --all
> 
> I'm not sure what's wrong with it, though.

So, it appears that 'sed' in macOS doesn't understand the
'\(idx\|pack\)' part of that regex.  Turning that command into

  sed -e "s#^.git/objects/pack/pack-\($OID_REGEX\)\..*#\1#" out | \

makes it work even on macOS, but note that those 40 hexdigits are not
actual OIDs but file content checksums, so using $OID_REGEX is not the
right thing to do here (though I'm not sure what is supposed to be
used instead, as $_x40 hardcodes the number of hexdigits).

Alas, the test as a whole still fails with the following on macOS:

  ++diff -u expected actual
  --- expected    2019-01-09 15:54:49.000000000 +0000
  +++ actual      2019-01-09 15:54:49.000000000 +0000
  @@ -1,4 +1,4 @@
   P1:24ee080366509364d04a138cd4e168dc4ff33354
  -P4:139d8b0cfe7e8970a8f3533835f90278d88de474
  +P3:0cf5cb6afaa1bae36b8e61ca398dbe29a15bc74e
   P5:23e0f02d822fa4bfe5ee63337ba5632cd7be208e
  -P6:deeb289f1749972f1cd57c3b9f359ece2361f60a
  +P7:4ecc1eb138516a26654cd4e3570b322c0820f170
  error: last command exited with $?=1
Jiang Xin Jan. 10, 2019, 3:28 a.m. UTC | #3
SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>
> On Wed, Jan 02, 2019 at 12:34:54PM +0800, Jiang Xin wrote:
> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> > +test_description='git redundant test'
>
> s/redundant/pack-redundant/ ?

Yes, will correct it.

> > +
> > +. ./test-lib.sh
> > +
> > +create_commits()
> > +{
> > +     set -e
> > +     parent=
> > +     for name in A B C D E F G H I J K L M
> > +     do
> > +             test_tick
> > +             T=$(git write-tree)
> > +             if test -z "$parent"
> > +             then
> > +                     sha1=$(echo $name | git commit-tree $T)
>
> There is a considerable effort going on to switch from SHA-1 to a
> different hash function, so please don't add any new $sha1 variable;
> call it $oid or $commit instead.
>

Will do.

> > +
> > +# Create commits and packs
> > +create_commits
> > +create_redundant_packs
>
> Please perform all setup tasks in a test_expect_success block, so we
> get verbose and trace output about what's going on.

Will do like this:

    test_expect_success 'setup' '
            create_commits  &&
            create_redundant_packs
    '

> Don't use 'set -e', use an &&-chain instead.  To fail the test if a
> command in the for loop were to fail you could do something like this:
>
>   for ....
>   do
>     do-this &&
>     do-that ||
>     return 1
>   done

Will do.

> > +
> > +test_expect_success 'clear loose objects' '
> > +     git prune-packed &&
> > +     test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
>
> Use something like
>
>   find .git/objects -type f | grep -v pack >out &&
>   test_must_be_empty out
>
> instead, so we get an informative error message on failure.

if `grep -v pack` return empty output, it will return error, so
I will use `sed -e "/objects\/pack\//d" >out` instead.

>
> > +'
> > +
> > +cat >expected <<EOF
> > +P1:$P1
> > +P4:$P4
> > +P5:$P5
> > +P6:$P6
> > +EOF
> > +
> > +test_expect_success 'git pack-redundant --all' '
> > +     git pack-redundant --all | \
>
> Don't run a git command (especially the particular command the test
> script focuses on) upstream of a pipe, because it hides the command's
> exit code.  Use an intermediate file instead.
>
> > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>
> This sed command doesn't seem to work on macOS (on Travis CI), and
> causes the test to fail with:
>

It works if rewrite as follows:

    git pack-redundant --all >out &&
    sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \

Without `-E`, MasOS has to write two seperate sed commands, such as:

    git pack-redundant --all >out &&
    sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
    sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
unti 4.3), released on May 11 2009.  I prefer the `-E` version.

>
> Minor nit: 'git pack-redundant' prints one filename per line, so the
> 'g' at the end of the 's###g' is not necessary.
>
> > +             sort -u | \
> > +             while read p; do eval echo "\${P$p}"; done | \
> > +             sort > actual && \
>
> Style nit: no space between redirection operator and filename

Will do.
Johannes Sixt Jan. 10, 2019, 7:11 a.m. UTC | #4
Am 10.01.19 um 04:28 schrieb Jiang Xin:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>> Use something like
>>
>>    find .git/objects -type f | grep -v pack >out &&
>>    test_must_be_empty out
>>
>> instead, so we get an informative error message on failure.
> 
> if `grep -v pack` return empty output, it will return error, so
> I will use `sed -e "/objects\/pack\//d" >out` instead.

So, you could even write this as

	find .git/objects -type f >out &&
	! grep -v pack out	# must be empty
or
	! find .git/objects -type f | grep -v pack

if you want to be terse.

-- Hannes
SZEDER Gábor Jan. 10, 2019, 11:57 a.m. UTC | #5
On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
> > > +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
> >
> > This sed command doesn't seem to work on macOS (on Travis CI), and
> > causes the test to fail with:
> >
> 
> It works if rewrite as follows:
> 
>     git pack-redundant --all >out &&
>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
> 
> Without `-E`, MasOS has to write two seperate sed commands, such as:
> 
>     git pack-redundant --all >out &&
>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
> 
> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
> unti 4.3), released on May 11 2009.  I prefer the `-E` version.

Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
whatnot?  I don't know, but POSIX doesn't mention it, there is not a
single instance of it in our current codebase, and it appears that
we've never used it before, either.  OTOH,
't/check-non-portable-shell.pl' doesn't catch it as non-portable
construct...
Torsten Bögershausen Jan. 10, 2019, 12:25 p.m. UTC | #6
On 10.01.19 12:57, SZEDER Gábor wrote:
> On Thu, Jan 10, 2019 at 11:28:34AM +0800, Jiang Xin wrote:
>> SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月9日周三 下午8:56写道:
>>>> +             sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
>>>
>>> This sed command doesn't seem to work on macOS (on Travis CI), and
>>> causes the test to fail with:
>>>
>>
>> It works if rewrite as follows:
>>
>>     git pack-redundant --all >out &&
>>     sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
>>
>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>>
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"
>>
>> Option '-E' is an alias for -r in GNU sed 4.2  (added in 4.2, not documented
>> unti 4.3), released on May 11 2009.  I prefer the `-E` version.
> 
> Is 'sed -E' portable enough, e.g. to the various BSDs, Solaris, and
> whatnot?  I don't know, but POSIX doesn't mention it, there is not a
> single instance of it in our current codebase, and it appears that
> we've never used it before, either.  OTOH,

If we can use "two seperate sed commands" i would (really) prefer to so,
to avoid "sed -E".
My conclusion is that it is not portable enough.
> 't/check-non-portable-shell.pl' doesn't catch it as non-portable
> construct...

Good point.
Actually that script only checks "known non-portable" options.
Every time somebody finds a non-portable option, we update it.
A growing blacklist, so to say.
May be we should have a white list instead.
Junio C Hamano Jan. 10, 2019, 5:36 p.m. UTC | #7
SZEDER Gábor <szeder.dev@gmail.com> writes:

>> Without `-E`, MasOS has to write two seperate sed commands, such as:
>> 
>>     git pack-redundant --all >out &&
>>     sed -e "s#.*/pack-\(.*\)\.idx#\1#" out | \
>>     sed -e "s#.*/pack-\(.*\)\.pack#\1#"

Two commands, perhaps, but does it have to be two separate sed
processes piped together?  Why won't something like this work?

	sed -e 's|.*/pack-\([0-9a-f]*\)\.idx$|\1' \
	-e 's|.*/pack-\([0-9a-f]*\)\.pack$|\1'
diff mbox series

Patch

diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
new file mode 100755
index 0000000000..ef6076f065
--- /dev/null
+++ b/t/t5323-pack-redundant.sh
@@ -0,0 +1,84 @@ 
+#!/bin/sh
+#
+# Copyright (c) 2018 Jiang Xin
+#
+
+test_description='git redundant test'
+
+. ./test-lib.sh
+
+create_commits()
+{
+	set -e
+	parent=
+	for name in A B C D E F G H I J K L M
+	do
+		test_tick
+		T=$(git write-tree)
+		if test -z "$parent"
+		then
+			sha1=$(echo $name | git commit-tree $T)
+		else
+			sha1=$(echo $name | git commit-tree -p $parent $T)
+		fi
+		eval $name=$sha1
+		parent=$sha1
+	done
+	git update-ref refs/heads/master $M
+}
+
+create_redundant_packs()
+{
+	set -e
+	cd .git/objects/pack
+	P1=$(printf "$T\n$A\n" | git pack-objects pack 2>/dev/null)
+	P2=$(printf "$T\n$A\n$B\n$C\n$D\n$E\n" | git pack-objects pack 2>/dev/null)
+	P3=$(printf "$C\n$D\n$F\n$G\n$I\n$J\n" | git pack-objects pack 2>/dev/null)
+	P4=$(printf "$D\n$E\n$G\n$H\n$J\n$K\n" | git pack-objects pack 2>/dev/null)
+	P5=$(printf "$F\n$G\n$H\n" | git pack-objects pack 2>/dev/null)
+	P6=$(printf "$F\n$I\n$L\n" | git pack-objects pack 2>/dev/null)
+	P7=$(printf "$H\n$K\n$M\n" | git pack-objects pack 2>/dev/null)
+	P8=$(printf "$L\n$M\n" | git pack-objects pack 2>/dev/null)
+	cd -
+	eval P$P1=P1:$P1
+	eval P$P2=P2:$P2
+	eval P$P3=P3:$P3
+	eval P$P4=P4:$P4
+	eval P$P5=P5:$P5
+	eval P$P6=P6:$P6
+	eval P$P7=P7:$P7
+	eval P$P8=P8:$P8
+}
+
+# Create commits and packs
+create_commits
+create_redundant_packs
+
+test_expect_success 'clear loose objects' '
+	git prune-packed &&
+	test $(find .git/objects -type f | grep -v pack | wc -l) -eq 0
+'
+
+cat >expected <<EOF
+P1:$P1
+P4:$P4
+P5:$P5
+P6:$P6
+EOF
+
+test_expect_success 'git pack-redundant --all' '
+	git pack-redundant --all | \
+		sed -e "s#^.*/pack-\(.*\)\.\(idx\|pack\)#\1#g" | \
+		sort -u | \
+		while read p; do eval echo "\${P$p}"; done | \
+		sort > actual && \
+	test_cmp expected actual
+'
+
+test_expect_success 'remove redundant packs' '
+	git pack-redundant --all | xargs rm &&
+	git fsck &&
+	test $(git pack-redundant --all | wc -l) -eq 0
+'
+
+test_done