diff mbox series

add git-p4.fallbackEncoding config variable, to prevent git-p4 from crashing on non UTF-8 changeset descriptions

Message ID 20210421084604.3095-1-tzadik.vanderhoof@gmail.com (mailing list archive)
State Superseded
Headers show
Series add git-p4.fallbackEncoding config variable, to prevent git-p4 from crashing on non UTF-8 changeset descriptions | expand

Commit Message

Tzadik Vanderhoof April 21, 2021, 8:46 a.m. UTC
---
 Documentation/git-p4.txt                   | 10 ++++
 git-p4.py                                  | 11 +++-
 t/t9835-git-p4-config-fallback-encoding.sh | 65 ++++++++++++++++++++++
 3 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100755 t/t9835-git-p4-config-fallback-encoding.sh

Comments

Tzadik Vanderhoof April 21, 2021, 8:55 a.m. UTC | #1
Signed-off-by: Tzadik Vanderhoof <tzadik.vanderhoof@gmail.com>
diff mbox series

Patch

diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index f89e68b..e0131a9 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -638,6 +638,16 @@  git-p4.pathEncoding::
 	to transcode the paths to UTF-8. As an example, Perforce on Windows
 	often uses "cp1252" to encode path names.
 
+git-p4.fallbackEncoding::
+	Perforce changeset descriptions can be in a mixture of encodings.
+	Git-p4 first tries to interpret each description as UTF-8. If that
+	fails, this config allows another encoding to be tried. You
+	can specify, for example, "cp1252". If instead of an encoding,
+	you specify "replace", UTF-8 will be used, with invalid UTF-8
+	characters replaced by the Unicode replacement character. If you
+	specify "none" (the default), there is no fallback, and any non
+	UTF-8 character will cause git-p4 to immediately fail.
+
 git-p4.largeFileSystem::
 	Specify the system that is used for large (binary) files. Please note
 	that large file systems do not support the 'git p4 submit' command.
diff --git a/git-p4.py b/git-p4.py
index 09c9e93..173f78a 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -771,7 +771,16 @@  def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
                 for key, value in entry.items():
                     key = key.decode()
                     if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')):
-                        value = value.decode()
+                        try:
+                            value = value.decode()
+                        except UnicodeDecodeError as ex:
+                            fallbackEncoding = gitConfig("git-p4.fallbackEncoding").lower() or 'none'
+                            if fallbackEncoding == 'none':
+                                raise Exception("UTF8 decoding failed. Consider using git config git-p4.fallbackEncoding") from ex
+                            elif fallbackEncoding == 'replace':
+                                value = value.decode(errors='replace')
+                            else:
+                                value = value.decode(encoding=fallbackEncoding)
                     decoded_entry[key] = value
                 # Parse out data if it's an error response
                 if decoded_entry.get('code') == 'error' and 'data' in decoded_entry:
diff --git a/t/t9835-git-p4-config-fallback-encoding.sh b/t/t9835-git-p4-config-fallback-encoding.sh
new file mode 100755
index 0000000..56a245e
--- /dev/null
+++ b/t/t9835-git-p4-config-fallback-encoding.sh
@@ -0,0 +1,65 @@ 
+#!/bin/sh
+
+test_description='test git-p4.fallbackEncoding config'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+. ./lib-git-p4.sh
+
+if test_have_prereq !MINGW,!CYGWIN; then
+	skip_all='This system is not subject to encoding failures in "git p4 clone"'
+	test_done
+fi
+
+test_expect_success 'start p4d' '
+	start_p4d
+'
+
+test_expect_success 'add cp1252 description' '
+	cd "$cli" &&
+	echo file1 >file1 &&
+	p4 add file1 &&
+	p4 submit -d documentación
+'
+
+test_expect_success 'clone fails with git-p4.fallbackEncoding unset' '
+	test_might_fail git config --global --unset git-p4.fallbackEncoding &&
+	test_when_finished cleanup_git &&
+	(
+		test_must_fail git p4 clone --dest="$git" //depot@all 2>> actual &&
+		grep "UTF8 decoding failed. Consider using git config git-p4.fallbackEncoding" actual
+	)
+'
+test_expect_success 'clone fails with git-p4.fallbackEncoding set to "none"' '
+	git config --global git-p4.fallbackEncoding none &&
+	test_when_finished cleanup_git &&
+	(
+		test_must_fail git p4 clone --dest="$git" //depot@all 2>> actual &&
+		grep "UTF8 decoding failed. Consider using git config git-p4.fallbackEncoding" actual
+	)
+'
+
+test_expect_success 'clone succeeds with git-p4.fallbackEncoding set to "cp1252"' '
+	git config --global git-p4.fallbackEncoding cp1252 &&
+	test_when_finished cleanup_git &&
+	(
+		git p4 clone --dest="$git" //depot@all &&
+		cd "$git" &&
+		git log --oneline >log &&
+		desc=$(head -1 log | awk '\''{print $2}'\'') &&	[ "$desc" = "documentación" ]
+	)
+'
+
+test_expect_success 'clone succeeds with git-p4.fallbackEncoding set to "replace"' '
+	git config --global git-p4.fallbackEncoding replace &&
+	test_when_finished cleanup_git &&
+	(
+		git p4 clone --dest="$git" //depot@all &&
+		cd "$git" &&
+		git log --oneline >log &&
+		desc=$(head -1 log | awk '\''{print $2}'\'') &&	[ "$desc" = "documentaci�n" ]
+	)
+'
+
+test_done