diff mbox series

[2/4] t: remove \{m,n\} from BRE grep usage

Message ID 752b12ef1e27d3b69d6aa3734309895082be7886.1663688697.git.congdanhqx@gmail.com (mailing list archive)
State New, archived
Headers show
Series allow grep -E, and remove egrep | expand

Commit Message

Đoàn Trần Công Danh Sept. 20, 2022, 3:49 p.m. UTC
\{m,n\} is a GNU extension to BRE, and it's forbidden by our
CodingGuidelines.

Change to fixed strings or ERE.

Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
---
 t/t3200-branch.sh             | 6 ++++--
 t/t3305-notes-fanout.sh       | 2 +-
 t/t3404-rebase-interactive.sh | 6 +++---
 t/t5550-http-fetch-dumb.sh    | 2 +-
 t/t5702-protocol-v2.sh        | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

Comments

SZEDER Gábor Sept. 20, 2022, 4:43 p.m. UTC | #1
On Tue, Sep 20, 2022 at 10:49:14PM +0700, Đoàn Trần Công Danh wrote:
> \{m,n\} is a GNU extension to BRE, and it's forbidden by our
> CodingGuidelines.
> 
> Change to fixed strings or ERE.
> 
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>  t/t3200-branch.sh             | 6 ++++--
>  t/t3305-notes-fanout.sh       | 2 +-
>  t/t3404-rebase-interactive.sh | 6 +++---
>  t/t5550-http-fetch-dumb.sh    | 2 +-
>  t/t5702-protocol-v2.sh        | 2 +-
>  5 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh
> index 9723c2827c..f05ac1fe0b 100755
> --- a/t/t3200-branch.sh
> +++ b/t/t3200-branch.sh
> @@ -201,8 +201,10 @@ test_expect_success 'git branch -M baz bam should succeed when baz is checked ou
>  
>  test_expect_success 'git branch -M baz bam should add entries to .git/logs/HEAD' '
>  	msg="Branch: renamed refs/heads/baz to refs/heads/bam" &&
> -	grep " 0\{40\}.*$msg$" .git/logs/HEAD &&
> -	grep "^0\{40\}.*$msg$" .git/logs/HEAD
> +	zero="00000000" &&
> +	zero="$zero$zero$zero$zero$zero" &&
> +	grep " $zero.*$msg$" .git/logs/HEAD &&
> +	grep "^$zero.*$msg$" .git/logs/HEAD

I think these could use $ZERO_OID instead.
Phillip Wood Sept. 20, 2022, 5:42 p.m. UTC | #2
Hi Đoàn

On 20/09/2022 16:49, Đoàn Trần Công Danh wrote:
> \{m,n\} is a GNU extension to BRE, and it's forbidden by our
> CodingGuidelines.

\{m,n\} is valid in a posix BRE[1]. If we're already using it without 
anyone complaining I think it would be better to update CodingGuidlines 
to allow it.

Best Wishes

Phillip

[1] 
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html

Section 9.3.6 says
...
5. When a BRE matching a single character, a subexpression, or a 
back-reference is followed by an interval expression of the format 
"\{m\}", "\{m,\}", or "\{m,n\}", together with that interval expression 
it shall match what repeated consecutive occurrences of the BRE would 
match. The values of m and n are decimal integers in the range 0 <= m<= 
n<= {RE_DUP_MAX}, where m specifies the exact or minimum number of 
occurrences and n specifies the maximum number of occurrences. The 
expression "\{m\}" shall match exactly m occurrences of the preceding 
BRE, "\{m,\}" shall match at least m occurrences, and "\{m,n\}" shall 
match any number of occurrences between m and n, inclusive.

For example, in the string "abababccccccd" the BRE "c\{3\}" is matched 
by characters seven to nine, the BRE "\(ab\)\{4,\}" is not matched at 
all, and the BRE "c\{1,3\}d" is matched by characters ten to thirteen.

> Change to fixed strings or ERE.
> 
> Signed-off-by: Đoàn Trần Công Danh <congdanhqx@gmail.com>
> ---
>   t/t3200-branch.sh             | 6 ++++--
>   t/t3305-notes-fanout.sh       | 2 +-
>   t/t3404-rebase-interactive.sh | 6 +++---
>   t/t5550-http-fetch-dumb.sh    | 2 +-
>   t/t5702-protocol-v2.sh        | 2 +-
>   5 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh
> index 9723c2827c..f05ac1fe0b 100755
> --- a/t/t3200-branch.sh
> +++ b/t/t3200-branch.sh
> @@ -201,8 +201,10 @@ test_expect_success 'git branch -M baz bam should succeed when baz is checked ou
>   
>   test_expect_success 'git branch -M baz bam should add entries to .git/logs/HEAD' '
>   	msg="Branch: renamed refs/heads/baz to refs/heads/bam" &&
> -	grep " 0\{40\}.*$msg$" .git/logs/HEAD &&
> -	grep "^0\{40\}.*$msg$" .git/logs/HEAD
> +	zero="00000000" &&
> +	zero="$zero$zero$zero$zero$zero" &&
> +	grep " $zero.*$msg$" .git/logs/HEAD &&
> +	grep "^$zero.*$msg$" .git/logs/HEAD
>   '
>   
>   test_expect_success 'git branch -M should leave orphaned HEAD alone' '
> diff --git a/t/t3305-notes-fanout.sh b/t/t3305-notes-fanout.sh
> index 22ffe5bcb9..aa3bb2e308 100755
> --- a/t/t3305-notes-fanout.sh
> +++ b/t/t3305-notes-fanout.sh
> @@ -9,7 +9,7 @@ path_has_fanout() {
>   	path=$1 &&
>   	fanout=$2 &&
>   	after_last_slash=$(($(test_oid hexsz) - $fanout * 2)) &&
> -	echo $path | grep -q "^\([0-9a-f]\{2\}/\)\{$fanout\}[0-9a-f]\{$after_last_slash\}$"
> +	echo $path | grep -q -E "^([0-9a-f][0-9a-f]/){$fanout}[0-9a-f]{$after_last_slash}$"
>   }
>   
>   touched_one_note_with_fanout() {
> diff --git a/t/t3404-rebase-interactive.sh b/t/t3404-rebase-interactive.sh
> index 688b01e3eb..4f5abb5ad2 100755
> --- a/t/t3404-rebase-interactive.sh
> +++ b/t/t3404-rebase-interactive.sh
> @@ -1244,9 +1244,9 @@ test_expect_success 'short commit ID collide' '
>   		test $colliding_id = "$(git rev-parse HEAD | cut -c 1-4)" &&
>   		grep "^pick $colliding_id " \
>   			.git/rebase-merge/git-rebase-todo.tmp &&
> -		grep "^pick [0-9a-f]\{$hexsz\}" \
> +		grep -E "^pick [0-9a-f]{$hexsz}" \
>   			.git/rebase-merge/git-rebase-todo &&
> -		grep "^pick [0-9a-f]\{$hexsz\}" \
> +		grep -E "^pick [0-9a-f]{$hexsz}" \
>   			.git/rebase-merge/git-rebase-todo.backup &&
>   		git rebase --continue
>   	) &&
> @@ -1261,7 +1261,7 @@ test_expect_success 'respect core.abbrev' '
>   		set_cat_todo_editor &&
>   		test_must_fail git rebase -i HEAD~4 >todo-list
>   	) &&
> -	test 4 = $(grep -c "pick [0-9a-f]\{12,\}" todo-list)
> +	test 4 = $(grep -c -E "pick [0-9a-f]{12,}" todo-list)
>   '
>   
>   test_expect_success 'todo count' '
> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index d7cf85ffea..8f182a3cbf 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -234,7 +234,7 @@ test_expect_success 'http-fetch --packfile' '
>   		--index-pack-arg=--keep \
>   		"$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
>   
> -	grep "^keep.[0-9a-f]\{16,\}$" out &&
> +	grep -E "^keep.[0-9a-f]{16,}$" out &&
>   	cut -c6- out >packhash &&
>   
>   	# Ensure that the expected files are generated
> diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
> index 5d42a355a8..b33cd4afca 100755
> --- a/t/t5702-protocol-v2.sh
> +++ b/t/t5702-protocol-v2.sh
> @@ -1001,7 +1001,7 @@ test_expect_success 'part of packfile response provided as URI' '
>   	do
>   		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
>   		{
> -			grep "^[0-9a-f]\{16,\} " out || :
> +			grep -E "^[0-9a-f]{16,} " out || :
>   		} >out.objectlist &&
>   		if test_line_count = 1 out.objectlist
>   		then
Junio C Hamano Sept. 20, 2022, 5:52 p.m. UTC | #3
Đoàn Trần Công Danh  <congdanhqx@gmail.com> writes:

> \{m,n\} is a GNU extension to BRE, and it's forbidden by our
> CodingGuidelines.

Is it?

https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_06

says otherwise.  There may be some other GNU extensions to BRE that
allows you to write ERE elements with different syntax, but I doubt
this is one of them.  Perhaps you are thinking about "A\|B"
alternation?  In ERE "A|B" is alternation, and GNU BRE allows "A\|B"
but that is outside POSIX, IIUC.  "A\+" (1 or more of A) and "A\?"
(0 or 1 of A) are the same way.

We do say we don't use "\{m,n\}" in the guidelines, which was
written more than 10 years ago that codifies the habit acquired
while having to deal with regexp implementations of various UNIX
variants like early SystemV and BSD4 from more than 20 years ago.

If we are using the syntax in many of our tests that everybody runs,
that can be taken as a sign that those platforms who had problems
with the syntax have died out, or at least to them Git does not
matter.

So my prefererence is to

 - Allow \{m,n\} when it makes sense and codify it in the guidelines

 - Rewriting tests is fine if it makes the result easier to read,
   but it shouldn't be done for the sole purpose of getting rid of
   the \{m,n\} syntax.

 - As there are folks without GNU, until these GNU extensions for |,
   +, and ? are adopted widely, keep forbidding their use in BRE.

>  test_expect_success 'git branch -M baz bam should add entries to .git/logs/HEAD' '
>  	msg="Branch: renamed refs/heads/baz to refs/heads/bam" &&
> -	grep " 0\{40\}.*$msg$" .git/logs/HEAD &&
> -	grep "^0\{40\}.*$msg$" .git/logs/HEAD
> +	zero="00000000" &&
> +	zero="$zero$zero$zero$zero$zero" &&
> +	grep " $zero.*$msg$" .git/logs/HEAD &&
> +	grep "^$zero.*$msg$" .git/logs/HEAD
>  '

This is not good

>  test_expect_success 'git branch -M should leave orphaned HEAD alone' '
> diff --git a/t/t3305-notes-fanout.sh b/t/t3305-notes-fanout.sh
> index 22ffe5bcb9..aa3bb2e308 100755
> --- a/t/t3305-notes-fanout.sh
> +++ b/t/t3305-notes-fanout.sh
> @@ -9,7 +9,7 @@ path_has_fanout() {
>  	path=$1 &&
>  	fanout=$2 &&
>  	after_last_slash=$(($(test_oid hexsz) - $fanout * 2)) &&
> -	echo $path | grep -q "^\([0-9a-f]\{2\}/\)\{$fanout\}[0-9a-f]\{$after_last_slash\}$"
> +	echo $path | grep -q -E "^([0-9a-f][0-9a-f]/){$fanout}[0-9a-f]{$after_last_slash}$"

The use of -E makes it more readable and is good.  The innermost "a
pair of hexdigits" that would repeat $fanout times may be easier to
read if you keep the {2}, though.
diff mbox series

Patch

diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh
index 9723c2827c..f05ac1fe0b 100755
--- a/t/t3200-branch.sh
+++ b/t/t3200-branch.sh
@@ -201,8 +201,10 @@  test_expect_success 'git branch -M baz bam should succeed when baz is checked ou
 
 test_expect_success 'git branch -M baz bam should add entries to .git/logs/HEAD' '
 	msg="Branch: renamed refs/heads/baz to refs/heads/bam" &&
-	grep " 0\{40\}.*$msg$" .git/logs/HEAD &&
-	grep "^0\{40\}.*$msg$" .git/logs/HEAD
+	zero="00000000" &&
+	zero="$zero$zero$zero$zero$zero" &&
+	grep " $zero.*$msg$" .git/logs/HEAD &&
+	grep "^$zero.*$msg$" .git/logs/HEAD
 '
 
 test_expect_success 'git branch -M should leave orphaned HEAD alone' '
diff --git a/t/t3305-notes-fanout.sh b/t/t3305-notes-fanout.sh
index 22ffe5bcb9..aa3bb2e308 100755
--- a/t/t3305-notes-fanout.sh
+++ b/t/t3305-notes-fanout.sh
@@ -9,7 +9,7 @@  path_has_fanout() {
 	path=$1 &&
 	fanout=$2 &&
 	after_last_slash=$(($(test_oid hexsz) - $fanout * 2)) &&
-	echo $path | grep -q "^\([0-9a-f]\{2\}/\)\{$fanout\}[0-9a-f]\{$after_last_slash\}$"
+	echo $path | grep -q -E "^([0-9a-f][0-9a-f]/){$fanout}[0-9a-f]{$after_last_slash}$"
 }
 
 touched_one_note_with_fanout() {
diff --git a/t/t3404-rebase-interactive.sh b/t/t3404-rebase-interactive.sh
index 688b01e3eb..4f5abb5ad2 100755
--- a/t/t3404-rebase-interactive.sh
+++ b/t/t3404-rebase-interactive.sh
@@ -1244,9 +1244,9 @@  test_expect_success 'short commit ID collide' '
 		test $colliding_id = "$(git rev-parse HEAD | cut -c 1-4)" &&
 		grep "^pick $colliding_id " \
 			.git/rebase-merge/git-rebase-todo.tmp &&
-		grep "^pick [0-9a-f]\{$hexsz\}" \
+		grep -E "^pick [0-9a-f]{$hexsz}" \
 			.git/rebase-merge/git-rebase-todo &&
-		grep "^pick [0-9a-f]\{$hexsz\}" \
+		grep -E "^pick [0-9a-f]{$hexsz}" \
 			.git/rebase-merge/git-rebase-todo.backup &&
 		git rebase --continue
 	) &&
@@ -1261,7 +1261,7 @@  test_expect_success 'respect core.abbrev' '
 		set_cat_todo_editor &&
 		test_must_fail git rebase -i HEAD~4 >todo-list
 	) &&
-	test 4 = $(grep -c "pick [0-9a-f]\{12,\}" todo-list)
+	test 4 = $(grep -c -E "pick [0-9a-f]{12,}" todo-list)
 '
 
 test_expect_success 'todo count' '
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index d7cf85ffea..8f182a3cbf 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -234,7 +234,7 @@  test_expect_success 'http-fetch --packfile' '
 		--index-pack-arg=--keep \
 		"$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
 
-	grep "^keep.[0-9a-f]\{16,\}$" out &&
+	grep -E "^keep.[0-9a-f]{16,}$" out &&
 	cut -c6- out >packhash &&
 
 	# Ensure that the expected files are generated
diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh
index 5d42a355a8..b33cd4afca 100755
--- a/t/t5702-protocol-v2.sh
+++ b/t/t5702-protocol-v2.sh
@@ -1001,7 +1001,7 @@  test_expect_success 'part of packfile response provided as URI' '
 	do
 		git verify-pack --object-format=$(test_oid algo) --verbose $idx >out &&
 		{
-			grep "^[0-9a-f]\{16,\} " out || :
+			grep -E "^[0-9a-f]{16,} " out || :
 		} >out.objectlist &&
 		if test_line_count = 1 out.objectlist
 		then