diff mbox series

[v3,1/3] t: add lib-crlf-messages.sh for messages containing CRLF

Message ID f17d182c3bf5e758490441801423cdb0da17060d.1602526169.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] t: add lib-crlf-messages.sh for messages containing CRLF | expand

Commit Message

Philippe Blain Oct. 12, 2020, 6:09 p.m. UTC
From: Philippe Blain <levraiphilippeblain@gmail.com>

A following commit will fix a bug in the ref-filter API that causes
commit and tag messages containing CRLF to be incorrectly parsed and
displayed.

Add a test library (t/lib-crlf-messages.sh) that creates refs with such
commit messages, so that we can easily test that this bug does not
appear in other commands in the future.

The function `test_crlf_subject_body_and_contents` can be used to test
that the `--format` option of `branch`, `tag`, `for-each-ref` and
`log` correctly displays the subject, body and raw content of commits and
tag messages.

The commits are created using `commit-tree` such that the current branch
in the test repository is not affected when `test_create_crlf_refs` is
called in a test. This is done so that the CRLF tests can be inserted
anywhere in a test script, where it makes sense to do so, without having
to potentially modify further tests that depend on output that would be
modified if the current branch gained new commits.

Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
---
 t/lib-crlf-messages.sh | 90 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 t/lib-crlf-messages.sh

Comments

Junio C Hamano Oct. 12, 2020, 10:22 p.m. UTC | #1
"Philippe Blain via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Philippe Blain <levraiphilippeblain@gmail.com>
>
> A following commit will fix a bug in the ref-filter API that causes
> commit and tag messages containing CRLF to be incorrectly parsed and
> displayed.
>
> Add a test library (t/lib-crlf-messages.sh) that creates refs with such
> commit messages, so that we can easily test that this bug does not
> appear in other commands in the future.
> ...
> The function `test_crlf_subject_body_and_contents` can be used to test
> that the `--format` option of `branch`, `tag`, `for-each-ref` and
> `log` correctly displays the subject, body and raw content of commits and
> tag messages.

I am not sure about the wisdom of this arrangement.  Surely you do
not want to write duplicated set-up for (existing) test scripts for
for-each-ref, branch and tag subcommands, assuming that these test
scripts are separated for subcommands they test.

But you can have a single test script, that is differentiated from
all other test scripts by what it tests: having to deal with commits
that use CRLF.  Then we do not have to add dot-includable test
library that lets various tests to create these same funny commits.
Instead, we can just do these as normal set-up step(s) for that
single test scripts, and then in that test scripts, verify what is
shown by various commands that share the underlying ref-filter
machinery.  No?


> diff --git a/t/lib-crlf-messages.sh b/t/lib-crlf-messages.sh
> new file mode 100644
> index 0000000000..10a2b57280
> --- /dev/null
> +++ b/t/lib-crlf-messages.sh
> @@ -0,0 +1,90 @@
> +# Setup refs with commit and tag messages containing CRLF
> +
> +LIB_CRLF_BRANCHES=""
> +
> +create_crlf_ref () {
> +	message="$1" &&
> +	subject="$2" &&
> +	body="$3" &&
> +	branch="$4" &&
> +	printf "${message}" >.crlf-message-${branch}.txt &&
> +	printf "${subject}" >.crlf-subject-${branch}.txt &&
> +	printf "${body}" >.crlf-body-${branch}.txt &&
> +	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}"
> +	test_tick &&
> +	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
> +	git branch ${branch} ${hash} &&
> +	git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
> +}
> +
> +create_crlf_refs () {
> +	message="Subject first line\r\n\r\nBody first line\r\nBody second line\r\n" &&
> +	body="Body first line\r\nBody second line\r\n" &&
> +	subject="Subject first line" &&
> +	branch="crlf" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> +	message="Subject first line\r\n\r\n\r\nBody first line\r\nBody second line\r\n" &&
> +	branch="crlf-empty-lines-after-subject" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> +	message="Subject first line\r\nSubject second line\r\n\r\nBody first line\r\nBody second line\r\n" &&
> +	subject="Subject first line Subject second line" &&
> +	branch="crlf-two-line-subject" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> +	message="Subject first line\r\nSubject second line" &&
> +	subject="Subject first line Subject second line" &&
> +	body="" &&
> +	branch="crlf-two-line-subject-no-body" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> +	message="Subject first line\r\nSubject second line\r\n" &&
> +	branch="crlf-two-line-subject-no-body-trailing-newline" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> +	message="Subject first line\r\nSubject second line\r\n\r" &&
> +	branch="crlf-two-line-subject-no-body-trailing-newline2" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}"
> +}
> +
> +test_create_crlf_refs () {
> +	test_expect_success 'setup refs with CRLF commit messages' '
> +		create_crlf_refs
> +	'
> +}
> +
> +cleanup_crlf_refs () {
> +	for branch in ${LIB_CRLF_BRANCHES}; do
> +		git branch -D ${branch} &&
> +		git tag -d tag-${branch} &&
> +		rm .crlf-message-${branch}.txt &&
> +		rm .crlf-subject-${branch}.txt &&
> +		rm .crlf-body-${branch}.txt
> +	done
> +}
> +
> +test_cleanup_crlf_refs () {
> +	test_expect_success 'cleanup refs with CRLF commit messages' '
> +		cleanup_crlf_refs
> +	'
> +}
> +
> +test_crlf_subject_body_and_contents() {

It does not excempt a script from being subject to the coding
guidelines to be a test library.

> +	command_and_args="$@" &&
> +	command=$1 &&
> +	if [ ${command} = "branch" ] || [ ${command} = "for-each-ref" ] || [ ${command} = "tag" ]; then
> +		atoms="(contents:subject) (contents:body) (contents)"
> +	elif [ ${command} = "log" ] || [ ${command} = "show" ]; then
> +		atoms="s b B"
> +	fi &&

This is the part that made me react to the organization.  Even
though this helper "library" pretends to be generic, it needs to
actually know exactly what subcommands are going to be tested with
the helper.  It is probably easier to read and understand if these
helper functions are defined in the same script as the one that
tests these various commands but for one specific aspect of these
commands (i.e. how the log message with funny line ending convention
are split into subject and body).

> +	files="subject body message" &&
> +	while  [ -n "${atoms}" ]; do
> +		set ${atoms} && atom=$1 && shift && atoms="$*" &&
> +		set ${files} &&	file=$1 && shift && files="$*" &&
> +		test_expect_success "${command}: --format='%${atom}' works with CRLF input" "
> +			rm -f expect &&
> +			for ref in ${LIB_CRLF_BRANCHES}; do
> +				cat .crlf-${file}-\"\${ref}\".txt >>expect &&
> +				printf \"\n\" >>expect
> +			done &&
> +			git $command_and_args --format=\"%${atom}\" >actual &&
> +			test_cmp expect actual
> +		"
> +	done
> +}
Eric Sunshine Oct. 12, 2020, 10:47 p.m. UTC | #2
On Mon, Oct 12, 2020 at 06:09:27PM +0000, Philippe Blain via GitGitGadget wrote:
> Add a test library (t/lib-crlf-messages.sh) that creates refs with such
> commit messages, so that we can easily test that this bug does not
> appear in other commands in the future.

In addition to Junio's review comments...

> 
> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
> ---
> diff --git a/t/lib-crlf-messages.sh b/t/lib-crlf-messages.sh
> @@ -0,0 +1,90 @@
> +create_crlf_ref () {
> +	message="$1" &&
> +	subject="$2" &&
> +	body="$3" &&
> +	branch="$4" &&
> +	printf "${message}" >.crlf-message-${branch}.txt &&
> +	printf "${subject}" >.crlf-subject-${branch}.txt &&
> +	printf "${body}" >.crlf-body-${branch}.txt &&
> +	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}"

Broken &&-chain.

> +	test_tick &&
> +	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
> +	git branch ${branch} ${hash} &&
> +	git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
> +}
> +
> +create_crlf_refs () {
> +	message="Subject first line\r\n\r\nBody first line\r\nBody second line\r\n" &&
> +	body="Body first line\r\nBody second line\r\n" &&
> +	subject="Subject first line" &&
> +	branch="crlf" &&
> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&

This is somewhat onerous to digest and compose. Have you considered
making it more automated and easier to read? Perhaps something like
this:

    create_crlf_ref () {
        branch=$1
        cat >.crlf-message-$branch.txt &&
        sed -n "1,/^$/p" <.crlf-message-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
        sed -n "/^$/,\$p" <.crlf-message-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
        ...
    }

    create_crlf_refs () {
        create_crlf_ref crlf <<-\EOF
        Subject first line

        Body first line
        Body second line
        EOF
        ...
    }

> +test_create_crlf_refs () {
> +	test_expect_success 'setup refs with CRLF commit messages' '
> +		create_crlf_refs
> +	'
> +}

This almost seems like an unnecessary indirection since callers could
just as easily do this on their own, like this:

    test_expect_success 'setup refs with CRLF commit messages' '
        create_crlf_refs
    '

which isn't very burdensome. However, I suppose doing it this way
gives consistent test titles between scripts, so not necessarily a
strong objection on my part.

> +cleanup_crlf_refs () {
> +	for branch in ${LIB_CRLF_BRANCHES}; do

Our style is to place 'do' on its own line:

    for branch in $LIB_CRLF_BRANCHES
    do
        ...

This would be a syntax error if LIB_CRLF_BRANCHES is empty for some
reason, but I suppose we don't really have to worry about it here(?).

> +		git branch -D ${branch} &&
> +		git tag -d tag-${branch} &&
> +		rm .crlf-message-${branch}.txt &&
> +		rm .crlf-subject-${branch}.txt &&
> +		rm .crlf-body-${branch}.txt
> +	done
> +}
> +
> +test_cleanup_crlf_refs () {
> +	test_expect_success 'cleanup refs with CRLF commit messages' '
> +		cleanup_crlf_refs
> +	'
> +}
> +
> +test_crlf_subject_body_and_contents() {
> +	command_and_args="$@" &&
> +	command=$1 &&
> +	if [ ${command} = "branch" ] || [ ${command} = "for-each-ref" ] || [ ${command} = "tag" ]; then
> +		atoms="(contents:subject) (contents:body) (contents)"
> +	elif [ ${command} = "log" ] || [ ${command} = "show" ]; then
> +		atoms="s b B"
> +	fi &&

Style:

    if test "$command" = "branch" || test ...
    then
        ...
    elif test ...
    then
        ...
    fi &&

> +	files="subject body message" &&
> +	while  [ -n "${atoms}" ]; do

Too many spaces after 'while'.

Style:

    while tests -n "..."
    do
        ...

> +		set ${atoms} && atom=$1 && shift && atoms="$*" &&
> +		set ${files} &&	file=$1 && shift && files="$*" &&
> +		test_expect_success "${command}: --format='%${atom}' works with CRLF input" "
> +			rm -f expect &&
> +			for ref in ${LIB_CRLF_BRANCHES}; do

Style.

> +				cat .crlf-${file}-\"\${ref}\".txt >>expect &&
> +				printf \"\n\" >>expect
> +			done &&
> +			git $command_and_args --format=\"%${atom}\" >actual &&
> +			test_cmp expect actual
> +		"
> +	done
> +}
Philippe Blain Oct. 14, 2020, 1:20 p.m. UTC | #3
Hi Eric,

> Le 12 oct. 2020 à 18:47, Eric Sunshine <sunshine@sunshineco.com> a écrit :
> 
> On Mon, Oct 12, 2020 at 06:09:27PM +0000, Philippe Blain via GitGitGadget wrote:
>> Add a test library (t/lib-crlf-messages.sh) that creates refs with such
>> commit messages, so that we can easily test that this bug does not
>> appear in other commands in the future.
> 
> In addition to Junio's review comments...
> 
>> 
>> Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
>> ---
>> diff --git a/t/lib-crlf-messages.sh b/t/lib-crlf-messages.sh
>> @@ -0,0 +1,90 @@
>> +create_crlf_ref () {
>> +	message="$1" &&
>> +	subject="$2" &&
>> +	body="$3" &&
>> +	branch="$4" &&
>> +	printf "${message}" >.crlf-message-${branch}.txt &&
>> +	printf "${subject}" >.crlf-subject-${branch}.txt &&
>> +	printf "${body}" >.crlf-body-${branch}.txt &&
>> +	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}"
> 
> Broken &&-chain.
> 
>> +	test_tick &&
>> +	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
>> +	git branch ${branch} ${hash} &&
>> +	git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
>> +}
>> +
>> +create_crlf_refs () {
>> +	message="Subject first line\r\n\r\nBody first line\r\nBody second line\r\n" &&
>> +	body="Body first line\r\nBody second line\r\n" &&
>> +	subject="Subject first line" &&
>> +	branch="crlf" &&
>> +	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
> 
> This is somewhat onerous to digest and compose. Have you considered
> making it more automated and easier to read? Perhaps something like
> this:
> 
>    create_crlf_ref () {
>        branch=$1
>        cat >.crlf-message-$branch.txt &&
>        sed -n "1,/^$/p" <.crlf-message-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
>        sed -n "/^$/,\$p" <.crlf-message-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
>        ...
>    }
> 
>    create_crlf_refs () {
>        create_crlf_ref crlf <<-\EOF
>        Subject first line
> 
>        Body first line
>        Body second line
>        EOF
>        ...
>    }

I did not try to do that because I did not think of it. 
However, I think it's clearer using printf, this way '\n' and '\r'
appear clearly on all platforms, whatever editor is in use
and whatever settings this editor is using to hide or not hide
control characters.

> 
>> +test_create_crlf_refs () {
>> +	test_expect_success 'setup refs with CRLF commit messages' '
>> +		create_crlf_refs
>> +	'
>> +}
> 
> This almost seems like an unnecessary indirection since callers could
> just as easily do this on their own, like this:
> 
>    test_expect_success 'setup refs with CRLF commit messages' '
>        create_crlf_refs
>    '
> 
> which isn't very burdensome. However, I suppose doing it this way
> gives consistent test titles between scripts, so not necessarily a
> strong objection on my part.

Yes, that was the reason. Given Junio's comments I'll surely
refactor his library into a script, so there won't be a need for
this indirection.

> 
>> +cleanup_crlf_refs () {
>> +	for branch in ${LIB_CRLF_BRANCHES}; do
> 
> Our style is to place 'do' on its own line:
> 
>    for branch in $LIB_CRLF_BRANCHES
>    do
>        ...
> 
> This would be a syntax error if LIB_CRLF_BRANCHES is empty for some
> reason, but I suppose we don't really have to worry about it here(?).
> 
>> +		git branch -D ${branch} &&
>> +		git tag -d tag-${branch} &&
>> +		rm .crlf-message-${branch}.txt &&
>> +		rm .crlf-subject-${branch}.txt &&
>> +		rm .crlf-body-${branch}.txt
>> +	done
>> +}
>> +
>> +test_cleanup_crlf_refs () {
>> +	test_expect_success 'cleanup refs with CRLF commit messages' '
>> +		cleanup_crlf_refs
>> +	'
>> +}
>> +
>> +test_crlf_subject_body_and_contents() {
>> +	command_and_args="$@" &&
>> +	command=$1 &&
>> +	if [ ${command} = "branch" ] || [ ${command} = "for-each-ref" ] || [ ${command} = "tag" ]; then
>> +		atoms="(contents:subject) (contents:body) (contents)"
>> +	elif [ ${command} = "log" ] || [ ${command} = "show" ]; then
>> +		atoms="s b B"
>> +	fi &&
> 
> Style:
> 
>    if test "$command" = "branch" || test ...
>    then
>        ...
>    elif test ...
>    then
>        ...
>    fi &&
> 
>> +	files="subject body message" &&
>> +	while  [ -n "${atoms}" ]; do
> 
> Too many spaces after 'while'.
> 
> Style:
> 
>    while tests -n "..."
>    do
>        ...
> 
>> +		set ${atoms} && atom=$1 && shift && atoms="$*" &&
>> +		set ${files} &&	file=$1 && shift && files="$*" &&
>> +		test_expect_success "${command}: --format='%${atom}' works with CRLF input" "
>> +			rm -f expect &&
>> +			for ref in ${LIB_CRLF_BRANCHES}; do
> 
> Style.
> 
>> +				cat .crlf-${file}-\"\${ref}\".txt >>expect &&
>> +				printf \"\n\" >>expect
>> +			done &&
>> +			git $command_and_args --format=\"%${atom}\" >actual &&
>> +			test_cmp expect actual
>> +		"
>> +	done
>> +}

Thanks for the review! (and I'll re-read the shell coding guidelines
before sending v4).

Cheers,
Philippe.
Philippe Blain Oct. 14, 2020, 1:22 p.m. UTC | #4
Hi Junio,

> Le 12 oct. 2020 à 18:22, Junio C Hamano <gitster@pobox.com> a écrit :
> 
> "Philippe Blain via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> From: Philippe Blain <levraiphilippeblain@gmail.com>
>> 
>> A following commit will fix a bug in the ref-filter API that causes
>> commit and tag messages containing CRLF to be incorrectly parsed and
>> displayed.
>> 
>> Add a test library (t/lib-crlf-messages.sh) that creates refs with such
>> commit messages, so that we can easily test that this bug does not
>> appear in other commands in the future.
>> ...
>> The function `test_crlf_subject_body_and_contents` can be used to test
>> that the `--format` option of `branch`, `tag`, `for-each-ref` and
>> `log` correctly displays the subject, body and raw content of commits and
>> tag messages.
> 
> I am not sure about the wisdom of this arrangement.  Surely you do
> not want to write duplicated set-up for (existing) test scripts for
> for-each-ref, branch and tag subcommands, assuming that these test
> scripts are separated for subcommands they test.
> 
> But you can have a single test script, that is differentiated from
> all other test scripts by what it tests: having to deal with commits
> that use CRLF.  Then we do not have to add dot-includable test
> library that lets various tests to create these same funny commits.
> Instead, we can just do these as normal set-up step(s) for that
> single test scripts, and then in that test scripts, verify what is
> shown by various commands that share the underlying ref-filter
> machinery.  No?

Yes. I was thinking that it made more sense for the tests
to be in existing test scripts, but if you feel a separate test
script is warranted for these tests, I'll do that instead.

Thanks,

Philippe.
Eric Sunshine Oct. 14, 2020, 1:45 p.m. UTC | #5
On Wed, Oct 14, 2020 at 9:20 AM Philippe Blain
<levraiphilippeblain@gmail.com> wrote:
> > Le 12 oct. 2020 à 18:47, Eric Sunshine <sunshine@sunshineco.com> a écrit :
> > This is somewhat onerous to digest and compose. Have you considered
> > making it more automated and easier to read? Perhaps something like
> > this:
> >
> >    create_crlf_ref () {
> >        branch=$1
> >        cat >.crlf-message-$branch.txt &&
> >        sed -n "1,/^$/p" <.crlf-message-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
> >        sed -n "/^$/,\$p" <.crlf-message-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
> >        ...
> >    }
> >
> >    create_crlf_refs () {
> >        create_crlf_ref crlf <<-\EOF
> >        Subject first line
> >
> >        Body first line
> >        Body second line
> >        EOF
> >        ...
> >    }
>
> I did not try to do that because I did not think of it.
> However, I think it's clearer using printf, this way '\n' and '\r'
> appear clearly on all platforms, whatever editor is in use
> and whatever settings this editor is using to hide or not hide
> control characters.

Sorry, I'm not sure I understand what you are saying about editors and
hiding or not hiding control characters. There are no hidden control
characters in the example code I posted.

The code I proposed is very explicit about using CRLF terminators. The
here-doc fed to create_crlf_ref() contains only the normal LF, but
then create_crlf_ref() explicitly converts those to CRLF by calling
append_cr().
Philippe Blain Oct. 14, 2020, 1:52 p.m. UTC | #6
> Le 14 oct. 2020 à 09:45, Eric Sunshine <sunshine@sunshineco.com> a écrit :
> 
> On Wed, Oct 14, 2020 at 9:20 AM Philippe Blain
> <levraiphilippeblain@gmail.com> wrote:
>>> Le 12 oct. 2020 à 18:47, Eric Sunshine <sunshine@sunshineco.com> a écrit :
>>> This is somewhat onerous to digest and compose. Have you considered
>>> making it more automated and easier to read? Perhaps something like
>>> this:
>>> 
>>>   create_crlf_ref () {
>>>       branch=$1
>>>       cat >.crlf-message-$branch.txt &&
>>>       sed -n "1,/^$/p" <.crlf-message-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
>>>       sed -n "/^$/,\$p" <.crlf-message-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
>>>       ...
>>>   }
>>> 
>>>   create_crlf_refs () {
>>>       create_crlf_ref crlf <<-\EOF
>>>       Subject first line
>>> 
>>>       Body first line
>>>       Body second line
>>>       EOF
>>>       ...
>>>   }
>> 
>> I did not try to do that because I did not think of it.
>> However, I think it's clearer using printf, this way '\n' and '\r'
>> appear clearly on all platforms, whatever editor is in use
>> and whatever settings this editor is using to hide or not hide
>> control characters.
> 
> Sorry, I'm not sure I understand what you are saying about editors and
> hiding or not hiding control characters. There are no hidden control
> characters in the example code I posted.
> 
> The code I proposed is very explicit about using CRLF terminators. The
> here-doc fed to create_crlf_ref() contains only the normal LF, but
> then create_crlf_ref() explicitly converts those to CRLF by calling
> append_cr().

Sorry, I missed that. I'll try to see if I can make it simpler using
this approach then.
Eric Sunshine Oct. 14, 2020, 11:01 p.m. UTC | #7
On Wed, Oct 14, 2020 at 9:52 AM Philippe Blain <levraiphilippeblain@gmail.com> wrote:
> > Le 14 oct. 2020 à 09:45, Eric Sunshine <sunshine@sunshineco.com> a écrit :
> >>> Le 12 oct. 2020 à 18:47, Eric Sunshine <sunshine@sunshineco.com> a écrit :
> >>>  create_crlf_ref () {
> >>>    branch=$1
> >>>    cat >.crlf-message-$branch.txt &&
> >>>    sed -n "1,/^$/p" <.crlf-message-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
> >>>    sed -n "/^$/,\$p" <.crlf-message-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
> >
> > The code I proposed is very explicit about using CRLF terminators. The
> > here-doc fed to create_crlf_ref() contains only the normal LF, but
> > then create_crlf_ref() explicitly converts those to CRLF by calling
> > append_cr().
>
> Sorry, I missed that. I'll try to see if I can make it simpler using
> this approach then.

By the way, if you also need .crlf-message-$branch.txt to have CRLF
line endings, then you'll probably want to use a temporary file (for
instance, .crlf-orig-$branch.txt), perhaps like this:

    create_crlf_ref () {
        branch=$1 &&
        cat >.crlf-orig-$branch.txt &&
        append_cr <.crlf-orig-$branch.txt >.crlf-message-$branch.txt &&
        sed -n "1,/^$/p" <.crlf-orig-$branch.txt | sed "/^$/d" | append_cr >.crlf-subject-$branch.txt &&
        sed -n "/^$/,\$p" <.crlf-orig-$branch.txt | sed "1d" | append_cr >.crlf-body-$branch.txt &&
        ...
    }
Philippe Blain Oct. 22, 2020, 3:09 a.m. UTC | #8
Hi Eric,

>> +cleanup_crlf_refs () {
>> +	for branch in ${LIB_CRLF_BRANCHES}; do
> 
> Our style is to place 'do' on its own line:
> 
>    for branch in $LIB_CRLF_BRANCHES
>    do
>        ...
> 
> This would be a syntax error if LIB_CRLF_BRANCHES is empty for some
> reason, but I suppose we don't really have to worry about it here(?).

Apparently, not in my shell, as I realized I had misspelled LIB_CRLF_BRANCHES as
LIB_CLRF_BRANCHES (CLRF instead of CRLF) at another place and the test was
passing correctly (the loop was not being entered at all though):

$ /bin/sh --version
GNU bash, version 3.2.57(1)-release (x86_64-apple-darwin15)
$ for a in $b; do echo hello; done; echo $?
0

I've fixed that in v4 which I just sent. 

Cheers,
Philippe.
diff mbox series

Patch

diff --git a/t/lib-crlf-messages.sh b/t/lib-crlf-messages.sh
new file mode 100644
index 0000000000..10a2b57280
--- /dev/null
+++ b/t/lib-crlf-messages.sh
@@ -0,0 +1,90 @@ 
+# Setup refs with commit and tag messages containing CRLF
+
+LIB_CRLF_BRANCHES=""
+
+create_crlf_ref () {
+	message="$1" &&
+	subject="$2" &&
+	body="$3" &&
+	branch="$4" &&
+	printf "${message}" >.crlf-message-${branch}.txt &&
+	printf "${subject}" >.crlf-subject-${branch}.txt &&
+	printf "${body}" >.crlf-body-${branch}.txt &&
+	LIB_CRLF_BRANCHES="${LIB_CRLF_BRANCHES} ${branch}"
+	test_tick &&
+	hash=$(git commit-tree HEAD^{tree} -p HEAD -F .crlf-message-${branch}.txt) &&
+	git branch ${branch} ${hash} &&
+	git tag tag-${branch} ${branch} -F .crlf-message-${branch}.txt --cleanup=verbatim
+}
+
+create_crlf_refs () {
+	message="Subject first line\r\n\r\nBody first line\r\nBody second line\r\n" &&
+	body="Body first line\r\nBody second line\r\n" &&
+	subject="Subject first line" &&
+	branch="crlf" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
+	message="Subject first line\r\n\r\n\r\nBody first line\r\nBody second line\r\n" &&
+	branch="crlf-empty-lines-after-subject" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
+	message="Subject first line\r\nSubject second line\r\n\r\nBody first line\r\nBody second line\r\n" &&
+	subject="Subject first line Subject second line" &&
+	branch="crlf-two-line-subject" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
+	message="Subject first line\r\nSubject second line" &&
+	subject="Subject first line Subject second line" &&
+	body="" &&
+	branch="crlf-two-line-subject-no-body" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
+	message="Subject first line\r\nSubject second line\r\n" &&
+	branch="crlf-two-line-subject-no-body-trailing-newline" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}" &&
+	message="Subject first line\r\nSubject second line\r\n\r" &&
+	branch="crlf-two-line-subject-no-body-trailing-newline2" &&
+	create_crlf_ref "${message}" "${subject}" "${body}" "${branch}"
+}
+
+test_create_crlf_refs () {
+	test_expect_success 'setup refs with CRLF commit messages' '
+		create_crlf_refs
+	'
+}
+
+cleanup_crlf_refs () {
+	for branch in ${LIB_CRLF_BRANCHES}; do
+		git branch -D ${branch} &&
+		git tag -d tag-${branch} &&
+		rm .crlf-message-${branch}.txt &&
+		rm .crlf-subject-${branch}.txt &&
+		rm .crlf-body-${branch}.txt
+	done
+}
+
+test_cleanup_crlf_refs () {
+	test_expect_success 'cleanup refs with CRLF commit messages' '
+		cleanup_crlf_refs
+	'
+}
+
+test_crlf_subject_body_and_contents() {
+	command_and_args="$@" &&
+	command=$1 &&
+	if [ ${command} = "branch" ] || [ ${command} = "for-each-ref" ] || [ ${command} = "tag" ]; then
+		atoms="(contents:subject) (contents:body) (contents)"
+	elif [ ${command} = "log" ] || [ ${command} = "show" ]; then
+		atoms="s b B"
+	fi &&
+	files="subject body message" &&
+	while  [ -n "${atoms}" ]; do
+		set ${atoms} && atom=$1 && shift && atoms="$*" &&
+		set ${files} &&	file=$1 && shift && files="$*" &&
+		test_expect_success "${command}: --format='%${atom}' works with CRLF input" "
+			rm -f expect &&
+			for ref in ${LIB_CRLF_BRANCHES}; do
+				cat .crlf-${file}-\"\${ref}\".txt >>expect &&
+				printf \"\n\" >>expect
+			done &&
+			git $command_and_args --format=\"%${atom}\" >actual &&
+			test_cmp expect actual
+		"
+	done
+}