diff mbox series

[RFC,1/4] git-p4: decode response from p4 to str for python3

Message ID 20191128012807.3103-2-yang.zhao@skyboxlabs.com (mailing list archive)
State New, archived
Headers show
Series git-p4: python 3 compatability | expand

Commit Message

Yang Zhao Nov. 28, 2019, 1:28 a.m. UTC
The marshalled dict in the response given on STDOUT by p4 uses `str` for
keys and string values. When run using python3, these values are
deserialized as `bytes`, leading to a whole host of problems as the rest
of the code assumes `str` is used throughout.

An exception is made for the `data` field as it may contain arbitrary
binary data that is not text, as well as `depotFile` which may contain
text encoded with something other than ASCII or UTF-8.

Signed-off-by: Yang Zhao <yang.zhao@skyboxlabs.com>
 git-p4.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff mbox series


diff --git a/git-p4.py b/git-p4.py
index 60c73b6a37..ead9d816e1 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -36,6 +36,7 @@ 
     unicode = str
     bytes = bytes
     basestring = (str,bytes)
+    use_encoded_streams = True
     # 'unicode' exists, must be Python 2
     str = str
@@ -643,6 +644,15 @@  def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False,
         while True:
             entry = marshal.load(p4.stdout)
+            if use_encoded_streams:
+                # Decode unmarshalled dict to use str keys and values, except for:
+                #   - `data` which may contain arbitrary binary data
+                #   - `depotFile` which may contain non-UTF8 encoded text
+                decoded_entry = {}
+                for key, value in entry.items():
+                    key = key.decode()
+                    decoded_entry[key] = value.decode() if not (key in ['data', 'depotFile'] or isinstance(value, str)) else value
+                entry = decoded_entry
             if skip_info:
                 if 'code' in entry and entry['code'] == 'info':