mbox series

[v5,0/9] Improve odd encoding integration

Message ID cover.1573205699.git.congdanhqx@gmail.com (mailing list archive)
Headers show
Series Improve odd encoding integration | expand

Message

Đoàn Trần Công Danh Nov. 8, 2019, 9:43 a.m. UTC
The series is shifting from fixing test that failed on musl based Linux to
correct the internal working encoding and output encoding of git-am
git-cherry-pick git-rebase and git-revert.

Change from v4:
- Update commit message per review
- Add test for last 2 patches
- Notice a breakage with git rebase --rebase-merges (see patch 7)


Doan Tran Cong Danh (9):
  t0028: eliminate non-standard usage of printf
  configure.ac: define ICONV_OMITS_BOM if necessary
  t3900: demonstrate git-rebase problem with multi encoding
  sequencer: reencode to utf-8 before arrange rebase's todo list
  sequencer: reencode revert/cherry-pick's todo list
  sequencer: reencode squashing commit's message
  sequencer: reencode old merge-commit message
  sequencer: reencode commit message for am/rebase --show-current-patch
  sequencer: fallback to sane label in making rebase todo list

 configure.ac                     |  49 ++++++++++++++++++
 sequencer.c                      |  32 ++++++++----
 t/t0028-working-tree-encoding.sh |   4 +-
 t/t3433-rebase-i18n.sh           |  84 +++++++++++++++++++++++++++++++
 t/t3433/ISO8859-1.txt            | Bin 0 -> 15 bytes
 t/t3433/eucJP.txt                | Bin 0 -> 68 bytes
 t/t3900-i18n-commit.sh           |  37 ++++++++++++++
 7 files changed, 195 insertions(+), 11 deletions(-)
 create mode 100755 t/t3433-rebase-i18n.sh
 create mode 100644 t/t3433/ISO8859-1.txt
 create mode 100644 t/t3433/eucJP.txt

Range-diff against v4:
 1:  daa0c27d28 =  1:  b3d6c4e720 t0028: eliminate non-standard usage of printf
 2:  c50964f413 !  2:  fe63a6bc44 configure.ac: define ICONV_OMITS_BOM if necessary
    @@ Commit message
     
             make ICONV_OMITS_BOM=Yes
     
    -    However, typing the flag all the time is cumbersome and error-prone.
    +    However, configure script wasn't taught to detect those systems.
     
    -    Add a check into configure script to detect this flag automatically.
    +    Teach configure to do so.
     
         Signed-off-by: Doan Tran Cong Danh <congdanhqx@gmail.com>
     
 3:  47888f236c !  3:  30f15075c4 t3900: demonstrate git-rebase problem with multi encoding
    @@ t/t3900-i18n-commit.sh: test_commit_autosquash_flags eucJP fixup
     +	new=$3
     +	msg=$4
     +	test_expect_failure "commit --$flag into $old from $new" '
    -+		git checkout -b '$flag-$old-$new' C0 &&
    -+		git config i18n.commitencoding '$old' &&
    -+		echo '$old' >>F &&
    -+		git commit -a -F "$TEST_DIRECTORY/t3900/'$msg'" &&
    ++		git checkout -b $flag-$old-$new C0 &&
    ++		git config i18n.commitencoding $old &&
    ++		echo $old >>F &&
    ++		git commit -a -F "$TEST_DIRECTORY"/t3900/$msg &&
     +		test_tick &&
     +		echo intermediate stuff >>G &&
     +		git add G &&
     +		git commit -a -m "intermediate commit" &&
     +		test_tick &&
    -+		git config i18n.commitencoding '$new' &&
    -+		echo '$new-$flag' >>F &&
    -+		git commit -a --'$flag' HEAD^ &&
    ++		git config i18n.commitencoding $new &&
    ++		echo $new-$flag >>F &&
    ++		git commit -a --$flag HEAD^ &&
     +		git rebase --autosquash -i HEAD^^^ &&
     +		git rev-list HEAD >actual &&
     +		test_line_count = 3 actual
 4:  42115f1e33 !  4:  17165b81e5 sequencer: reencode to utf-8 before arrange rebase's todo list
    @@ Commit message
         Thus, t3900::test_commit_autosquash_flags is failing on musl libc.
     
         Reencode to utf-8 before arranging rebase's todo list.
    -    By doing this, we also remove a breakage introduced in the previous
    -    commit.
    +
    +    By doing this, we also remove a breakage noticed by a test added in the
    +    previous commit.
     
         Signed-off-by: Doan Tran Cong Danh <congdanhqx@gmail.com>
     
    @@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
      	msg=$4
     -	test_expect_failure "commit --$flag into $old from $new" '
     +	test_expect_success "commit --$flag into $old from $new" '
    - 		git checkout -b '$flag-$old-$new' C0 &&
    - 		git config i18n.commitencoding '$old' &&
    - 		echo '$old' >>F &&
    + 		git checkout -b $flag-$old-$new C0 &&
    + 		git config i18n.commitencoding $old &&
    + 		echo $old >>F &&
 5:  5a871d7226 =  5:  40fa759492 sequencer: reencode revert/cherry-pick's todo list
 6:  1c6194a598 !  6:  ed6cfab5d2 sequencer: reencode squashing commit's message
    @@ sequencer.c: static int commit_staged_changes(struct repository *r,
     
      ## t/t3900-i18n-commit.sh ##
     @@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
    - 	old=$2
    - 	new=$3
    - 	msg=$4
    -+	squash_msg=
    -+	if test $flag = squash; then
    -+		squash_msg='
    -+		subject="squash! $(head -1 expect)" &&
    -+		printf "\n%s\n" "$subject" >> expect &&
    -+		'
    -+	fi
    - 	test_expect_success "commit --$flag into $old from $new" '
    - 		git checkout -b '$flag-$old-$new' C0 &&
    - 		git config i18n.commitencoding '$old' &&
    -@@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
    - 		git commit -a --'$flag' HEAD^ &&
    + 		git commit -a --$flag HEAD^ &&
      		git rebase --autosquash -i HEAD^^^ &&
      		git rev-list HEAD >actual &&
     -		test_line_count = 3 actual
     +		test_line_count = 3 actual &&
    -+		iconv -f '$old' -t utf-8 "$TEST_DIRECTORY/t3900/'$msg'" >expect &&
    -+		'"$squash_msg"'
    ++		iconv -f $old -t UTF-8 "$TEST_DIRECTORY"/t3900/$msg >expect &&
    ++		if test $flag = squash; then
    ++			subject="$(head -1 expect)" &&
    ++			printf "\nsquash! %s\n" "$subject" >>expect
    ++		fi &&
     +		git cat-file commit HEAD^ >raw &&
    -+		(sed "1,/^$/d" raw | iconv -f '$new' -t utf-8) >actual &&
    ++		(sed "1,/^$/d" raw | iconv -f $new -t utf-8) >actual &&
     +		test_cmp expect actual
      	'
      }
 7:  95df3cdadf <  -:  ---------- sequencer: reencode old merge-commit message
 8:  0606b2408d <  -:  ---------- sequencer: reencode commit message for am/rebase --show-current-patch
 -:  ---------- >  7:  def9adf97e sequencer: reencode old merge-commit message
 -:  ---------- >  8:  2e95ca57d2 sequencer: reencode commit message for am/rebase --show-current-patch
 -:  ---------- >  9:  860dee65f4 sequencer: fallback to sane label in making rebase todo list

Comments

Junio C Hamano Nov. 11, 2019, 1:22 a.m. UTC | #1
Doan Tran Cong Danh <congdanhqx@gmail.com> writes:

> The series is shifting from fixing test that failed on musl based Linux to
> correct the internal working encoding and output encoding of git-am
> git-cherry-pick git-rebase and git-revert.
>
> Change from v4:
> - Update commit message per review
> - Add test for last 2 patches
> - Notice a breakage with git rebase --rebase-merges (see patch 7)
>

Re: Patch 3, it indeed is a bit sad that out test framework does not
let us "show" what exactly is being executed in the debug/verbose
mode when we use parameterized test helpers. We designed the test
helpers like test_expect_success and test_eval_ so that the eval'ed
block can access the outer variables safely, i.e. "$flag-$old-$new"
can and should be written inside dq for safety, but the variable
names would be all we see in the verbose log because of that.  But
that's not the end of the world ;-)

I do not have a strong opinion on Patch 9 (I agree we need to fall
back on something sensible and without any change, the system is
quite broken---I do not have a strong opinion on what the fallback
should be); I'd love to hear what sequencer folks think.

Overall this round looks quite good.

Thanks, will queue.




>
> Doan Tran Cong Danh (9):
>   t0028: eliminate non-standard usage of printf
>   configure.ac: define ICONV_OMITS_BOM if necessary
>   t3900: demonstrate git-rebase problem with multi encoding
>   sequencer: reencode to utf-8 before arrange rebase's todo list
>   sequencer: reencode revert/cherry-pick's todo list
>   sequencer: reencode squashing commit's message
>   sequencer: reencode old merge-commit message
>   sequencer: reencode commit message for am/rebase --show-current-patch
>   sequencer: fallback to sane label in making rebase todo list
>
>  configure.ac                     |  49 ++++++++++++++++++
>  sequencer.c                      |  32 ++++++++----
>  t/t0028-working-tree-encoding.sh |   4 +-
>  t/t3433-rebase-i18n.sh           |  84 +++++++++++++++++++++++++++++++
>  t/t3433/ISO8859-1.txt            | Bin 0 -> 15 bytes
>  t/t3433/eucJP.txt                | Bin 0 -> 68 bytes
>  t/t3900-i18n-commit.sh           |  37 ++++++++++++++
>  7 files changed, 195 insertions(+), 11 deletions(-)
>  create mode 100755 t/t3433-rebase-i18n.sh
>  create mode 100644 t/t3433/ISO8859-1.txt
>  create mode 100644 t/t3433/eucJP.txt
>
> Range-diff against v4:
>  1:  daa0c27d28 =  1:  b3d6c4e720 t0028: eliminate non-standard usage of printf
>  2:  c50964f413 !  2:  fe63a6bc44 configure.ac: define ICONV_OMITS_BOM if necessary
>     @@ Commit message
>      
>              make ICONV_OMITS_BOM=Yes
>      
>     -    However, typing the flag all the time is cumbersome and error-prone.
>     +    However, configure script wasn't taught to detect those systems.
>      
>     -    Add a check into configure script to detect this flag automatically.
>     +    Teach configure to do so.
>      
>          Signed-off-by: Doan Tran Cong Danh <congdanhqx@gmail.com>
>      
>  3:  47888f236c !  3:  30f15075c4 t3900: demonstrate git-rebase problem with multi encoding
>     @@ t/t3900-i18n-commit.sh: test_commit_autosquash_flags eucJP fixup
>      +	new=$3
>      +	msg=$4
>      +	test_expect_failure "commit --$flag into $old from $new" '
>     -+		git checkout -b '$flag-$old-$new' C0 &&
>     -+		git config i18n.commitencoding '$old' &&
>     -+		echo '$old' >>F &&
>     -+		git commit -a -F "$TEST_DIRECTORY/t3900/'$msg'" &&
>     ++		git checkout -b $flag-$old-$new C0 &&
>     ++		git config i18n.commitencoding $old &&
>     ++		echo $old >>F &&
>     ++		git commit -a -F "$TEST_DIRECTORY"/t3900/$msg &&
>      +		test_tick &&
>      +		echo intermediate stuff >>G &&
>      +		git add G &&
>      +		git commit -a -m "intermediate commit" &&
>      +		test_tick &&
>     -+		git config i18n.commitencoding '$new' &&
>     -+		echo '$new-$flag' >>F &&
>     -+		git commit -a --'$flag' HEAD^ &&
>     ++		git config i18n.commitencoding $new &&
>     ++		echo $new-$flag >>F &&
>     ++		git commit -a --$flag HEAD^ &&
>      +		git rebase --autosquash -i HEAD^^^ &&
>      +		git rev-list HEAD >actual &&
>      +		test_line_count = 3 actual
>  4:  42115f1e33 !  4:  17165b81e5 sequencer: reencode to utf-8 before arrange rebase's todo list
>     @@ Commit message
>          Thus, t3900::test_commit_autosquash_flags is failing on musl libc.
>      
>          Reencode to utf-8 before arranging rebase's todo list.
>     -    By doing this, we also remove a breakage introduced in the previous
>     -    commit.
>     +
>     +    By doing this, we also remove a breakage noticed by a test added in the
>     +    previous commit.
>      
>          Signed-off-by: Doan Tran Cong Danh <congdanhqx@gmail.com>
>      
>     @@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
>       	msg=$4
>      -	test_expect_failure "commit --$flag into $old from $new" '
>      +	test_expect_success "commit --$flag into $old from $new" '
>     - 		git checkout -b '$flag-$old-$new' C0 &&
>     - 		git config i18n.commitencoding '$old' &&
>     - 		echo '$old' >>F &&
>     + 		git checkout -b $flag-$old-$new C0 &&
>     + 		git config i18n.commitencoding $old &&
>     + 		echo $old >>F &&
>  5:  5a871d7226 =  5:  40fa759492 sequencer: reencode revert/cherry-pick's todo list
>  6:  1c6194a598 !  6:  ed6cfab5d2 sequencer: reencode squashing commit's message
>     @@ sequencer.c: static int commit_staged_changes(struct repository *r,
>      
>       ## t/t3900-i18n-commit.sh ##
>      @@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
>     - 	old=$2
>     - 	new=$3
>     - 	msg=$4
>     -+	squash_msg=
>     -+	if test $flag = squash; then
>     -+		squash_msg='
>     -+		subject="squash! $(head -1 expect)" &&
>     -+		printf "\n%s\n" "$subject" >> expect &&
>     -+		'
>     -+	fi
>     - 	test_expect_success "commit --$flag into $old from $new" '
>     - 		git checkout -b '$flag-$old-$new' C0 &&
>     - 		git config i18n.commitencoding '$old' &&
>     -@@ t/t3900-i18n-commit.sh: test_commit_autosquash_multi_encoding () {
>     - 		git commit -a --'$flag' HEAD^ &&
>     + 		git commit -a --$flag HEAD^ &&
>       		git rebase --autosquash -i HEAD^^^ &&
>       		git rev-list HEAD >actual &&
>      -		test_line_count = 3 actual
>      +		test_line_count = 3 actual &&
>     -+		iconv -f '$old' -t utf-8 "$TEST_DIRECTORY/t3900/'$msg'" >expect &&
>     -+		'"$squash_msg"'
>     ++		iconv -f $old -t UTF-8 "$TEST_DIRECTORY"/t3900/$msg >expect &&
>     ++		if test $flag = squash; then
>     ++			subject="$(head -1 expect)" &&
>     ++			printf "\nsquash! %s\n" "$subject" >>expect
>     ++		fi &&
>      +		git cat-file commit HEAD^ >raw &&
>     -+		(sed "1,/^$/d" raw | iconv -f '$new' -t utf-8) >actual &&
>     ++		(sed "1,/^$/d" raw | iconv -f $new -t utf-8) >actual &&
>      +		test_cmp expect actual
>       	'
>       }
>  7:  95df3cdadf <  -:  ---------- sequencer: reencode old merge-commit message
>  8:  0606b2408d <  -:  ---------- sequencer: reencode commit message for am/rebase --show-current-patch
>  -:  ---------- >  7:  def9adf97e sequencer: reencode old merge-commit message
>  -:  ---------- >  8:  2e95ca57d2 sequencer: reencode commit message for am/rebase --show-current-patch
>  -:  ---------- >  9:  860dee65f4 sequencer: fallback to sane label in making rebase todo list
Junio C Hamano Nov. 11, 2019, 4:02 a.m. UTC | #2
Doan Tran Cong Danh <congdanhqx@gmail.com> writes:

>  t/t3433-rebase-i18n.sh           |  84 +++++++++++++++++++++++++++++++
>  t/t3433/ISO8859-1.txt            | Bin 0 -> 15 bytes
>  t/t3433/eucJP.txt                | Bin 0 -> 68 bytes
>  t/t3900-i18n-commit.sh           |  37 ++++++++++++++
>  7 files changed, 195 insertions(+), 11 deletions(-)
>  create mode 100755 t/t3433-rebase-i18n.sh

Doesn't "make test" barf with test-lint failure with this series
merged to 'pu'?

In other words, isn't 3433 already taken by another series?
Đoàn Trần Công Danh Nov. 11, 2019, 4:43 a.m. UTC | #3
On 2019-11-11 13:02:37 +0900, Junio C Hamano wrote:
> Doan Tran Cong Danh <congdanhqx@gmail.com> writes:
> 
> >  t/t3433-rebase-i18n.sh           |  84 +++++++++++++++++++++++++++++++
> >  t/t3433/ISO8859-1.txt            | Bin 0 -> 15 bytes
> >  t/t3433/eucJP.txt                | Bin 0 -> 68 bytes
> >  t/t3900-i18n-commit.sh           |  37 ++++++++++++++
> >  7 files changed, 195 insertions(+), 11 deletions(-)
> >  create mode 100755 t/t3433-rebase-i18n.sh
> 
> Doesn't "make test" barf with test-lint failure with this series
> merged to 'pu'?
> 
> In other words, isn't 3433 already taken by another series?  

It's taken in pu but not master.

I branched out from master and I forgot to check pu hence I hadn't
noticed this barf earlier.

Should I send a re-roll to rename to t3434?
Junio C Hamano Nov. 11, 2019, 6:14 a.m. UTC | #4
Junio C Hamano <gitster@pobox.com> writes:

> Doan Tran Cong Danh <congdanhqx@gmail.com> writes:
>
>>  t/t3433-rebase-i18n.sh           |  84 +++++++++++++++++++++++++++++++
>>  t/t3433/ISO8859-1.txt            | Bin 0 -> 15 bytes
>>  t/t3433/eucJP.txt                | Bin 0 -> 68 bytes
>>  t/t3900-i18n-commit.sh           |  37 ++++++++++++++
>>  7 files changed, 195 insertions(+), 11 deletions(-)
>>  create mode 100755 t/t3433-rebase-i18n.sh
>
> Doesn't "make test" barf with test-lint failure with this series
> merged to 'pu'?
>
> In other words, isn't 3433 already taken by another series?  

In the meantime, here is what I added to the tip of the series.
Notice that t3434 itself does not have to repeat its own name over
and over this way.

Thanks.

 t/{t3433-rebase-i18n.sh => t3434-rebase-i18n.sh} | 10 ++++++----
 t/{t3433 => t3434}/ISO8859-1.txt                 |  0
 t/{t3433 => t3434}/eucJP.txt                     |  0
 3 files changed, 6 insertions(+), 4 deletions(-)
 rename t/{t3433-rebase-i18n.sh => t3434-rebase-i18n.sh} (89%)
 rename t/{t3433 => t3434}/ISO8859-1.txt (100%)
 rename t/{t3433 => t3434}/eucJP.txt (100%)

diff --git a/t/t3433-rebase-i18n.sh b/t/t3434-rebase-i18n.sh
similarity index 89%
rename from t/t3433-rebase-i18n.sh
rename to t/t3434-rebase-i18n.sh
index 93e5402849..f693a7f4a0 100755
--- a/t/t3433-rebase-i18n.sh
+++ b/t/t3434-rebase-i18n.sh
@@ -16,8 +16,10 @@ Initial setup:
 
 . ./test-lib.sh
 
+test_vector="$TEST_DIRECTORY/t3434"
+
 compare_msg () {
-	iconv -f "$2" -t "$3" "$TEST_DIRECTORY/t3433/$1" >expect &&
+	iconv -f "$2" -t "$3" "$test_vector/$1" >expect &&
 	git cat-file commit HEAD >raw &&
 	sed "1,/^$/d" raw >actual &&
 	test_cmp expect actual
@@ -39,7 +41,7 @@ test_expect_success setup '
 test_expect_success 'rebase --rebase-merges update encoding eucJP to UTF-8' '
 	git switch -c merge-eucJP-UTF-8 first &&
 	git config i18n.commitencoding eucJP &&
-	git merge -F "$TEST_DIRECTORY/t3433/eucJP.txt" second &&
+	git merge -F "$test_vector/eucJP.txt" second &&
 	git config i18n.commitencoding UTF-8 &&
 	git rebase --rebase-merges master &&
 	compare_msg eucJP.txt eucJP UTF-8
@@ -48,7 +50,7 @@ test_expect_success 'rebase --rebase-merges update encoding eucJP to UTF-8' '
 test_expect_success 'rebase --rebase-merges update encoding eucJP to ISO-2022-JP' '
 	git switch -c merge-eucJP-ISO-2022-JP first &&
 	git config i18n.commitencoding eucJP &&
-	git merge -F "$TEST_DIRECTORY/t3433/eucJP.txt" second &&
+	git merge -F "$test_vector/eucJP.txt" second &&
 	git config i18n.commitencoding ISO-2022-JP &&
 	git rebase --rebase-merges master &&
 	compare_msg eucJP.txt eucJP ISO-2022-JP
@@ -64,7 +66,7 @@ test_rebase_continue_update_encode () {
 		echo for-conflict >two.t &&
 		git add two.t &&
 		git config i18n.commitencoding $old &&
-		git commit -F "$TEST_DIRECTORY/t3433/$msgfile" &&
+		git commit -F "$test_vector/$msgfile" &&
 		git config i18n.commitencoding $new &&
 		test_must_fail git rebase -m master &&
 		test -f .git/rebase-merge/message &&
diff --git a/t/t3433/ISO8859-1.txt b/t/t3434/ISO8859-1.txt
similarity index 100%
rename from t/t3433/ISO8859-1.txt
rename to t/t3434/ISO8859-1.txt
diff --git a/t/t3433/eucJP.txt b/t/t3434/eucJP.txt
similarity index 100%
rename from t/t3433/eucJP.txt
rename to t/t3434/eucJP.txt