diff mbox series

[V2] patch-id: fix scan_hunk_header on diffs with 1 line of before/after

Message ID 20220129024757.24763-1-jerry@skydio.com (mailing list archive)
State New, archived
Headers show
Series [V2] patch-id: fix scan_hunk_header on diffs with 1 line of before/after | expand

Commit Message

Jerry Zhang Jan. 29, 2022, 2:47 a.m. UTC
Normally diffs will contain a hunk header of the format
"@@ -2,2 +2,15 @@ code". However in certain situations git can
omit the second comma separated value in either before or after
line counts in the case where there is only one line.

This can produce hunk headers that look like
"@@ -2 +2,18 @@ code" or "@@ -2,2 +2 @@ code".
As a result, scan_hunk_header mistakenly returns the line
number as line count, which then results in unpredictable
parsing errors with the rest of the patch, including giving
multiple lines of output for a single commit.

Fix by explicitly setting line count to 1 when there is
no comma, and add a test.

apply.c contains this same logic except it is correct. A
worthwhile future project might be to unify these two diff
parsers so they both benefit from fixes.

Signed-off-by: Jerry Zhang <jerry@skydio.com>
---
V1->V2:
- Removed references to -U1 since this isn't limited to that
- Added notes for future tasks

 builtin/patch-id.c  |  9 +++++++--
 t/t4204-patch-id.sh | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 3 deletions(-)

Comments

Junio C Hamano Jan. 31, 2022, 6:55 p.m. UTC | #1
Jerry Zhang <jerry@skydio.com> writes:

> Normally diffs will contain a hunk header of the format
> "@@ -2,2 +2,15 @@ code". However in certain situations git can
> omit the second comma separated value in either before or after
> line counts in the case where there is only one line.

No need to resend, but this is specific enough (i.e. ",1" is
omitted) and "in certain situations" makes it needlessly fuzzy,
making readers wonder if there are any other cases.  Also, this is
not "git can omit".  The "unified context" format output from any
"diff" command share this special case.

Otherwise looking very good.

Thanks for a fix.  Will queue.
Junio C Hamano Jan. 31, 2022, 10:54 p.m. UTC | #2
Jerry Zhang <jerry@skydio.com> writes:

> Normally diffs will contain a hunk header of the format
> "@@ -2,2 +2,15 @@ code". However in certain situations git can
> omit the second comma separated value in either before or after
> line counts in the case where there is only one line.
>
> This can produce hunk headers that look like
> "@@ -2 +2,18 @@ code" or "@@ -2,2 +2 @@ code".
> As a result, scan_hunk_header mistakenly returns the line
> number as line count, which then results in unpredictable
> parsing errors with the rest of the patch, including giving
> multiple lines of output for a single commit.
>
> Fix by explicitly setting line count to 1 when there is
> no comma, and add a test.
>
> apply.c contains this same logic except it is correct. A
> worthwhile future project might be to unify these two diff
> parsers so they both benefit from fixes.
>
> Signed-off-by: Jerry Zhang <jerry@skydio.com>
> ---
> V1->V2:
> - Removed references to -U1 since this isn't limited to that
> - Added notes for future tasks
>
>  builtin/patch-id.c  |  9 +++++++--
>  t/t4204-patch-id.sh | 32 +++++++++++++++++++++++++++++++-
>  2 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/builtin/patch-id.c b/builtin/patch-id.c
> index 822ffff51f..881fcf3273 100644
> --- a/builtin/patch-id.c
> +++ b/builtin/patch-id.c
> @@ -30,26 +30,31 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
>  
>  	q = p + 4;
>  	n = strspn(q, digits);
>  	if (q[n] == ',') {
>  		q += n + 1;
> +		*p_before = atoi(q);
>  		n = strspn(q, digits);
> +	} else {
> +		*p_before = 1;
>  	}
> +
>  	if (n == 0 || q[n] != ' ' || q[n+1] != '+')
>  		return 0;
>  
>  	r = q + n + 2;
>  	n = strspn(r, digits);
>  	if (r[n] == ',') {
>  		r += n + 1;
> +		*p_after = atoi(r);
>  		n = strspn(r, digits);
> +	} else {
> +		*p_after = 1;
>  	}
>  	if (n == 0)
>  		return 0;
>  
> -	*p_before = atoi(q);
> -	*p_after = atoi(r);
>  	return 1;
>  }
>  
>  static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
>  			   struct strbuf *line_buf, int stable)
> diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
> index 80f4a65b28..34a386aee2 100755
> --- a/t/t4204-patch-id.sh
> +++ b/t/t4204-patch-id.sh
> @@ -36,11 +36,11 @@ test_expect_success 'patch-id output is well-formed' '
>  calc_patch_id () {
>  	patch_name="$1"
>  	shift
>  	git patch-id "$@" >patch-id.output &&
>  	sed "s/ .*//" patch-id.output >patch-id_"$patch_name" &&
> -	test_line_count -gt 0 patch-id_"$patch_name"
> +	test_line_count -eq 1 patch-id_"$patch_name"
>  }
>  
>  get_top_diff () {
>  	git log -p -1 "$@" -O bar-then-foo --
>  }
> @@ -200,6 +200,36 @@ EOF
>  test_expect_success 'patch-id handles no-nl-at-eof markers' '
>  	cat nonl | calc_patch_id nonl &&
>  	cat withnl | calc_patch_id withnl &&
>  	test_cmp patch-id_nonl patch-id_withnl
>  '
> +
> +cat >diffu1 <<\EOF
> +diff --git a/bar b/bar
> +index bdaf90f..31051f6 100644
> +--- a/bar
> ++++ b/bar
> +@@ -2 +2,2 @@
> + b
> ++c
> +diff --git a/car b/car
> +index 00750ed..2ae5e34 100644
> +--- a/car
> ++++ b/car
> +@@ -1 +1,2 @@
> + 3
> ++d
> +diff --git a/foo b/foo
> +index e439850..7146eb8 100644
> +--- a/foo
> ++++ b/foo
> +@@ -2 +2,2 @@
> + a
> ++e
> +EOF
> +
> +test_expect_success 'patch-id handles diffs with one line of before/after'

There is a opening quote for the body missing here.

> +	cat diffu1 | calc_patch_id diffu1 &&
> +	test_config patchid.stable true &&
> +	cat diffu1 | calc_patch_id diffu1stable
> +'
>  test_done
Junio C Hamano Jan. 31, 2022, 11:05 p.m. UTC | #3
Jerry Zhang <jerry@skydio.com> writes:

> diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
> index 80f4a65b28..34a386aee2 100755
> --- a/t/t4204-patch-id.sh
> +++ b/t/t4204-patch-id.sh
> @@ -200,6 +200,36 @@ EOF
>  test_expect_success 'patch-id handles no-nl-at-eof markers' '
>  	cat nonl | calc_patch_id nonl &&
>  	cat withnl | calc_patch_id withnl &&
>  	test_cmp patch-id_nonl patch-id_withnl
>  '

This is not a new issue, but this old test script needs updating.
And worse yet, your new test copies the existing badness and make it
twice as bad.

We frown upon doing anything outside test_expect_* blocks these
days, so the commands to prepare nonl and withnl files before thie
precontext of this patch needs to be folded into the test body.

Also, sending a single file using "cat" into pipeline is an
antipattern.  Just redirect into the downstream command instead,
i.e.

	test_expect_success 'patch-id handles no-nl-at-eof markers' '
		cat >nonl <<-\EOF &&
                diff --git i/a w/a
                index e69de29..2e65efe 100644
                --- i/a
                +++ w/a
                @@ -0,0 +1 @@
                +a
                \ No newline at end of file
                diff --git i/b w/b
                index e69de29..6178079 100644
                --- i/b
                +++ w/b
                @@ -0,0 +1 @@
                +b
                EOF
		cat >withnl <<-\EOF &&
		... likewise ...
		EOF
		calc_patch_id nonl <nonl &&
		calc_patch_id withnl <withnl &&
		test_cmp patch-i_ nonl patch-id-withnl
	'
	
or something like that.

I think the "handles no-nl-at-eof markers" test at the end is the
first and only serious offender, so instead of leaving it after the
series, we probalby would want to have a preliminary clean-up patch
before this patch, and fix the test this patch adds.

Thanks.
diff mbox series

Patch

diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 822ffff51f..881fcf3273 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -30,26 +30,31 @@  static int scan_hunk_header(const char *p, int *p_before, int *p_after)
 
 	q = p + 4;
 	n = strspn(q, digits);
 	if (q[n] == ',') {
 		q += n + 1;
+		*p_before = atoi(q);
 		n = strspn(q, digits);
+	} else {
+		*p_before = 1;
 	}
+
 	if (n == 0 || q[n] != ' ' || q[n+1] != '+')
 		return 0;
 
 	r = q + n + 2;
 	n = strspn(r, digits);
 	if (r[n] == ',') {
 		r += n + 1;
+		*p_after = atoi(r);
 		n = strspn(r, digits);
+	} else {
+		*p_after = 1;
 	}
 	if (n == 0)
 		return 0;
 
-	*p_before = atoi(q);
-	*p_after = atoi(r);
 	return 1;
 }
 
 static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
 			   struct strbuf *line_buf, int stable)
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index 80f4a65b28..34a386aee2 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -36,11 +36,11 @@  test_expect_success 'patch-id output is well-formed' '
 calc_patch_id () {
 	patch_name="$1"
 	shift
 	git patch-id "$@" >patch-id.output &&
 	sed "s/ .*//" patch-id.output >patch-id_"$patch_name" &&
-	test_line_count -gt 0 patch-id_"$patch_name"
+	test_line_count -eq 1 patch-id_"$patch_name"
 }
 
 get_top_diff () {
 	git log -p -1 "$@" -O bar-then-foo --
 }
@@ -200,6 +200,36 @@  EOF
 test_expect_success 'patch-id handles no-nl-at-eof markers' '
 	cat nonl | calc_patch_id nonl &&
 	cat withnl | calc_patch_id withnl &&
 	test_cmp patch-id_nonl patch-id_withnl
 '
+
+cat >diffu1 <<\EOF
+diff --git a/bar b/bar
+index bdaf90f..31051f6 100644
+--- a/bar
++++ b/bar
+@@ -2 +2,2 @@
+ b
++c
+diff --git a/car b/car
+index 00750ed..2ae5e34 100644
+--- a/car
++++ b/car
+@@ -1 +1,2 @@
+ 3
++d
+diff --git a/foo b/foo
+index e439850..7146eb8 100644
+--- a/foo
++++ b/foo
+@@ -2 +2,2 @@
+ a
++e
+EOF
+
+test_expect_success 'patch-id handles diffs with one line of before/after'
+	cat diffu1 | calc_patch_id diffu1 &&
+	test_config patchid.stable true &&
+	cat diffu1 | calc_patch_id diffu1stable
+'
 test_done