diff mbox series

[v4] line-range: fix infinite loop bug with '$' regex

Message ID 20221219224850.2703967-1-lars@oddbit.com (mailing list archive)
State Accepted
Commit 4e57c88e028c25ee9428b51ff829299bbb88a80c
Headers show
Series [v4] line-range: fix infinite loop bug with '$' regex | expand

Commit Message

Lars Kellogg-Stedman Dec. 19, 2022, 10:48 p.m. UTC
When the -L argument to "git log" is passed the zero-width regular
expression "$" (as in "-L :$:line-range.c"), this results in an
infinite loop in find_funcname_matching_regexp().

Modify find_funcname_matching_regexp to correctly match the entire line
instead of the zero-width match at eol and update the loop condition to
prevent an infinite loop in the event of other undiscovered corner cases.

The primary change is that we pre-decrement the beginning-of-line marker
('bol') before comparing it to '\n'. In the case of '$', where we match the
'\n' at the end of the line and start the loop with bol == eol, this
ensures that bol will find the beginning of the line on which the match
occurred.

Originally reported in <https://stackoverflow.com/q/74690545/147356>.

Signed-off-by: Lars Kellogg-Stedman <lars@oddbit.com>
---
 line-range.c        |  7 ++++---
 t/t4211-line-log.sh | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Dec. 19, 2022, 10:55 p.m. UTC | #1
On Mon, Dec 19 2022, Lars Kellogg-Stedman wrote:

> When the -L argument to "git log" is passed the zero-width regular
> expression "$" (as in "-L :$:line-range.c"), this results in an
> infinite loop in find_funcname_matching_regexp().
>
> Modify find_funcname_matching_regexp to correctly match the entire line
> instead of the zero-width match at eol and update the loop condition to
> prevent an infinite loop in the event of other undiscovered corner cases.
>
> The primary change is that we pre-decrement the beginning-of-line marker
> ('bol') before comparing it to '\n'. In the case of '$', where we match the
> '\n' at the end of the line and start the loop with bol == eol, this
> ensures that bol will find the beginning of the line on which the match
> occurred.
>
> Originally reported in <https://stackoverflow.com/q/74690545/147356>.
>
> Signed-off-by: Lars Kellogg-Stedman <lars@oddbit.com>
> ---
>  line-range.c        |  7 ++++---
>  t/t4211-line-log.sh | 22 ++++++++++++++++++++++
>  2 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/line-range.c b/line-range.c
> index 955a8a9535..47bf0d6f1a 100644
> --- a/line-range.c
> +++ b/line-range.c
> @@ -135,7 +135,7 @@ static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
>  {
>  	int reg_error;
>  	regmatch_t match[1];
> -	while (1) {
> +	while (*start) {
>  		const char *bol, *eol;
>  		reg_error = regexec(regexp, start, 1, match, 0);
>  		if (reg_error == REG_NOMATCH)
> @@ -148,8 +148,8 @@ static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
>  		/* determine extent of line matched */
>  		bol = start+match[0].rm_so;
>  		eol = start+match[0].rm_eo;
> -		while (bol > start && *bol != '\n')
> -			bol--;
> +		while (bol > start && *--bol != '\n')
> +			; /* nothing */
>  		if (*bol == '\n')
>  			bol++;
>  		while (*eol && *eol != '\n')
> @@ -161,6 +161,7 @@ static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
>  			return bol;
>  		start = eol;
>  	}
> +	return NULL;
>  }
>  
>  static const char *parse_range_funcname(
> diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh
> index ac9e4d0928..c6540e822f 100755
> --- a/t/t4211-line-log.sh
> +++ b/t/t4211-line-log.sh
> @@ -315,4 +315,26 @@ test_expect_success 'line-log with --before' '
>  	test_cmp expect actual
>  '
>  
> +test_expect_success 'setup tests for zero-width regular expressions' '
> +	cat >expect <<-EOF
> +	Modify func1() in file.c
> +	Add func1() and func2() in file.c
> +	EOF
> +'
> +
> +test_expect_success 'zero-width regex $ matches any function name' '
> +	git log --format="%s" --no-patch "-L:$:file.c" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'zero-width regex ^ matches any function name' '
> +	git log --format="%s" --no-patch "-L:^:file.c" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'zero-width regex .* matches any function name' '
> +	git log --format="%s" --no-patch "-L:.*:file.c" >actual &&
> +	test_cmp expect actual
> +'
> +

Untested, but as most of this is repeated & would fail if the "setup"
test is skipped, a one-off function would be better, e.g.:
	
	test_zerowidth_regex () {
		local rx="$1" &&
		
		test_expect_success "zero-width regex '$rx' matches any function name" '
			cat >expect <<-\EOF &&
			Modify func1() in file.c
			Add func1() and func2() in file.c
			EOF
			git log --format="%s" --no-patch -L:"$rx":file.c >actual &&
			test_cmp expect actual
		'
	}
	
	test_zerowidth_regex '$'
	test_zerowidth_regex '^'
	test_zerowidth_regex '.*'

The change of direction for the fix itself looks good to me (re my
earlier feedback on a previous round), i.e. that we should fix our own
code, not forbid '$' in regexes.
Eric Sunshine Dec. 20, 2022, midnight UTC | #2
On Mon, Dec 19, 2022 at 6:00 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
> On Mon, Dec 19 2022, Lars Kellogg-Stedman wrote:
> > +test_expect_success 'setup tests for zero-width regular expressions' '
> > +     cat >expect <<-EOF
> > +     Modify func1() in file.c
> > +     Add func1() and func2() in file.c
> > +     EOF
> > +'
> > +
> > +test_expect_success 'zero-width regex $ matches any function name' '
> > +     git log --format="%s" --no-patch "-L:$:file.c" >actual &&
> > +     test_cmp expect actual
> > +'
>
> Untested, but as most of this is repeated & would fail if the "setup"
> test is skipped, a one-off function would be better, e.g.:
>
>         test_zerowidth_regex () {
>                 local rx="$1" &&
>                 [...]
>         }
>         test_zerowidth_regex '$'
>         test_zerowidth_regex '^'
>         test_zerowidth_regex '.*'
>
> The change of direction for the fix itself looks good to me (re my
> earlier feedback on a previous round), i.e. that we should fix our own
> code, not forbid '$' in regexes.

It's subjective, of course, but the patch seems "good enough" as-is
and the tests are easy to understand. Therefore, can you clarify for
Lars and other reviewers if you're merely presenting an alternative
approach, or if you consider your suggestion "blocking" and expect a
reroll.
Junio C Hamano Dec. 20, 2022, 1:07 a.m. UTC | #3
Eric Sunshine <sunshine@sunshineco.com> writes:

>> The change of direction for the fix itself looks good to me (re my
>> earlier feedback on a previous round), i.e. that we should fix our own
>> code, not forbid '$' in regexes.
>
> It's subjective, of course, but the patch seems "good enough" as-is
> and the tests are easy to understand. Therefore, can you clarify for
> Lars and other reviewers if you're merely presenting an alternative
> approach, or if you consider your suggestion "blocking" and expect a
> reroll.

Thanks for saying this.

I personally felt that the original was easier to follow---given
that these tests do not have too many moving parts that may need to
be changed (in which case abstracting common parts out would allow
us to maintain only one copy which is a big win), being able to read
from top to bottom without having to read a function and recall what
the parameter was etc.
diff mbox series

Patch

diff --git a/line-range.c b/line-range.c
index 955a8a9535..47bf0d6f1a 100644
--- a/line-range.c
+++ b/line-range.c
@@ -135,7 +135,7 @@  static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
 {
 	int reg_error;
 	regmatch_t match[1];
-	while (1) {
+	while (*start) {
 		const char *bol, *eol;
 		reg_error = regexec(regexp, start, 1, match, 0);
 		if (reg_error == REG_NOMATCH)
@@ -148,8 +148,8 @@  static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
 		/* determine extent of line matched */
 		bol = start+match[0].rm_so;
 		eol = start+match[0].rm_eo;
-		while (bol > start && *bol != '\n')
-			bol--;
+		while (bol > start && *--bol != '\n')
+			; /* nothing */
 		if (*bol == '\n')
 			bol++;
 		while (*eol && *eol != '\n')
@@ -161,6 +161,7 @@  static const char *find_funcname_matching_regexp(xdemitconf_t *xecfg, const char
 			return bol;
 		start = eol;
 	}
+	return NULL;
 }
 
 static const char *parse_range_funcname(
diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh
index ac9e4d0928..c6540e822f 100755
--- a/t/t4211-line-log.sh
+++ b/t/t4211-line-log.sh
@@ -315,4 +315,26 @@  test_expect_success 'line-log with --before' '
 	test_cmp expect actual
 '
 
+test_expect_success 'setup tests for zero-width regular expressions' '
+	cat >expect <<-EOF
+	Modify func1() in file.c
+	Add func1() and func2() in file.c
+	EOF
+'
+
+test_expect_success 'zero-width regex $ matches any function name' '
+	git log --format="%s" --no-patch "-L:$:file.c" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'zero-width regex ^ matches any function name' '
+	git log --format="%s" --no-patch "-L:^:file.c" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'zero-width regex .* matches any function name' '
+	git log --format="%s" --no-patch "-L:.*:file.c" >actual &&
+	test_cmp expect actual
+'
+
 test_done