[v5,05/15] git-p4: promote encodeWithUTF8() to a global function
diff mbox series

Message ID 11d7703e411f1dced8a34defc68922ba44c614d5.1575740863.git.gitgitgadget@gmail.com
State New
Headers show
Series
  • git-p4.py: Cast byte strings to unicode strings in python3
Related show

Commit Message

Johannes Schindelin via GitGitGadget Dec. 7, 2019, 5:47 p.m. UTC
From: Ben Keene <seraphire@gmail.com>

This changelist is an intermediate submission for migrating the P4
support from Python 2 to Python 3. The code needs access to the
encodeWithUTF8() for support of non-UTF8 filenames in the clone class as
well as the sync class.

Move the function encodeWithUTF8() from the P4Sync class to a
stand-alone function.  This will allow other classes to use this
function without instanciating the P4Sync class. Change the self.verbose
reference to an optional method parameter. Update the existing
references to this function to pass the self.verbose since it is no
longer available on "self" since the function is no longer contained on
the P4Sync class.

Signed-off-by: Ben Keene <seraphire@gmail.com>
---
 git-p4.py | 52 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 16 deletions(-)

Comments

Junio C Hamano Dec. 11, 2019, 4:39 p.m. UTC | #1
"Ben Keene via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Ben Keene <seraphire@gmail.com>
>
> This changelist is an intermediate submission for migrating the P4
> support from Python 2 to Python 3. The code needs access to the
> encodeWithUTF8() for support of non-UTF8 filenames in the clone class as
> well as the sync class.
>
> Move the function encodeWithUTF8() from the P4Sync class to a
> stand-alone function.  This will allow other classes to use this
> function without instanciating the P4Sync class.

Makes quite a lot of sense, as I do not see a reason why this needs
to be attached to any specific instance of P4Sync.

Patch
diff mbox series

diff --git a/git-p4.py b/git-p4.py
index 3153186df0..cc6c490e2c 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -27,7 +27,7 @@ 
 import ctypes
 import errno
 
-# support basestring in python3
+# support basestring in Python 3
 try:
     unicode = unicode
 except NameError:
@@ -46,7 +46,7 @@ 
 try:
     from subprocess import CalledProcessError
 except ImportError:
-    # from python2.7:subprocess.py
+    # from Python 2.7:subprocess.py
     # Exception classes used by this module.
     class CalledProcessError(Exception):
         """This exception is raised when a process run by check_call() returns
@@ -587,6 +587,38 @@  def isModeExec(mode):
     # otherwise False.
     return mode[-3:] == "755"
 
+def encodeWithUTF8(path, verbose=False):
+    """ Ensure that the path is encoded as a UTF-8 string
+
+        Returns bytes(P3)/str(P2)
+    """
+
+    if isunicode:
+        try:
+            if isinstance(path, unicode):
+                # It is already unicode, cast it as a bytes
+                # that is encoded as utf-8.
+                return path.encode('utf-8', 'strict')
+            path.decode('ascii', 'strict')
+        except:
+            encoding = 'utf8'
+            if gitConfig('git-p4.pathEncoding'):
+                encoding = gitConfig('git-p4.pathEncoding')
+            path = path.decode(encoding, 'replace').encode('utf8', 'replace')
+            if verbose:
+                print('\nNOTE:Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, to_unicode(path)))
+    else:
+        try:
+            path.decode('ascii')
+        except:
+            encoding = 'utf8'
+            if gitConfig('git-p4.pathEncoding'):
+                encoding = gitConfig('git-p4.pathEncoding')
+            path = path.decode(encoding, 'replace').encode('utf8', 'replace')
+            if verbose:
+                print('Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, path))
+    return path
+
 class P4Exception(Exception):
     """ Base class for exceptions from the p4 client """
     def __init__(self, exit_code):
@@ -2748,24 +2780,12 @@  def writeToGitStream(self, gitMode, relPath, contents):
             self.gitStream.write(d)
         self.gitStream.write('\n')
 
-    def encodeWithUTF8(self, path):
-        try:
-            path.decode('ascii')
-        except:
-            encoding = 'utf8'
-            if gitConfig('git-p4.pathEncoding'):
-                encoding = gitConfig('git-p4.pathEncoding')
-            path = path.decode(encoding, 'replace').encode('utf8', 'replace')
-            if self.verbose:
-                print('Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, path))
-        return path
-
     # output one file from the P4 stream
     # - helper for streamP4Files
 
     def streamOneP4File(self, file, contents):
         relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
-        relPath = self.encodeWithUTF8(relPath)
+        relPath = encodeWithUTF8(relPath, self.verbose)
         if verbose:
             if 'fileSize' in self.stream_file:
                 size = int(self.stream_file['fileSize'])
@@ -2848,7 +2868,7 @@  def streamOneP4File(self, file, contents):
 
     def streamOneP4Deletion(self, file):
         relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
-        relPath = self.encodeWithUTF8(relPath)
+        relPath = encodeWithUTF8(relPath, self.verbose)
         if verbose:
             sys.stdout.write("delete %s\n" % relPath)
             sys.stdout.flush()