diff mbox series

[6/6] git-p4: Resolve RCS keywords in binary

Message ID 20211209201029.136886-7-jholdsworth@nvidia.com (mailing list archive)
State Superseded
Headers show
Series Transition git-p4.py to support Python 3 only | expand

Commit Message

Joel Holdsworth Dec. 9, 2021, 8:10 p.m. UTC
Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
---
 git-p4.py | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

Comments

Luke Diamand Dec. 10, 2021, 7:57 a.m. UTC | #1
On Thu, 9 Dec 2021 at 20:11, Joel Holdsworth <jholdsworth@nvidia.com> wrote:
>
> Signed-off-by: Joel Holdsworth <jholdsworth@nvidia.com>
> ---
>  git-p4.py | 31 ++++++++++---------------------
>  1 file changed, 10 insertions(+), 21 deletions(-)
>
> diff --git a/git-p4.py b/git-p4.py
> index c362a5fa38..87e6685eb6 100755
> --- a/git-p4.py
> +++ b/git-p4.py
> @@ -46,6 +46,9 @@
>
>  p4_access_checked = False
>
> +re_ko_keywords = re.compile(rb'\$(Id|Header)(:[^$\n]+)?\$')
> +re_k_keywords = re.compile(rb'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')

I'm not sure what's going on here, but it does not look like just
turning off support for python2.x.


> +
>  def p4_build_cmd(cmd):
>      """Build a suitable p4 command line.
>
> @@ -532,20 +535,12 @@ def p4_type(f):
>  #
>  def p4_keywords_regexp_for_type(base, type_mods):
>      if base in ("text", "unicode", "binary"):
> -        kwords = None
>          if "ko" in type_mods:
> -            kwords = 'Id|Header'
> +            return re_ko_keywords
>          elif "k" in type_mods:
> -            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
> +            return re_k_keywords
>          else:
>              return None
> -        pattern = r"""
> -            \$              # Starts with a dollar, followed by...
> -            (%s)            # one of the keywords, followed by...
> -            (:[^$\n]+)?     # possibly an old expansion, followed by...
> -            \$              # another dollar
> -            """ % kwords
> -        return pattern
>      else:
>          return None
>
> @@ -2035,11 +2030,10 @@ def applyCommit(self, id):
>                  kwfiles = {}
>                  for file in editedFiles | filesToDelete:
>                      # did this file's delta contain RCS keywords?
> -                    pattern = p4_keywords_regexp_for_file(file)
> +                    regexp = p4_keywords_regexp_for_file(file)
>
> -                    if pattern:
> +                    if regexp:
>                          # this file is a possibility...look for RCS keywords.
> -                        regexp = re.compile(pattern, re.VERBOSE)
>                          for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
>                              if regexp.search(line):
>                                  if verbose:
> @@ -2968,14 +2962,9 @@ def streamOneP4File(self, file, contents):
>              print("\nIgnoring apple filetype file %s" % file['depotFile'])
>              return
>
> -        # Note that we do not try to de-mangle keywords on utf16 files,
> -        # even though in theory somebody may want that.

This comment appears to have been stripped out, does that mean that we
now *do* try to demangle keywords on utf16?

> -        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
> -        if pattern:
> -            regexp = re.compile(pattern, re.VERBOSE)
> -            text = ''.join(c.decode() for c in contents)
> -            text = regexp.sub(r'$\1$', text)
> -            contents = [text.encode()]
> +        regexp = p4_keywords_regexp_for_type(type_base, type_mods)
> +        if regexp:
> +            contents = [regexp.sub(rb'$\1$', c) for c in contents]
>
>          if self.largeFileSystem:
>              (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
> --
> 2.33.0
>
Joel Holdsworth Dec. 10, 2021, 10:51 a.m. UTC | #2
> This comment appears to have been stripped out, does that mean that we
> now *do* try to demangle keywords on utf16?

Good point. I guess the comment should stay.

Though really... I'm not sure what we should do with utf16 files. Currently the RCS keywords just won't be resolved, with no warning for the user! I guess we could resolve them, if we had a reliable way of detecting UTF-16?
diff mbox series

Patch

diff --git a/git-p4.py b/git-p4.py
index c362a5fa38..87e6685eb6 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -46,6 +46,9 @@ 
 
 p4_access_checked = False
 
+re_ko_keywords = re.compile(rb'\$(Id|Header)(:[^$\n]+)?\$')
+re_k_keywords = re.compile(rb'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
+
 def p4_build_cmd(cmd):
     """Build a suitable p4 command line.
 
@@ -532,20 +535,12 @@  def p4_type(f):
 #
 def p4_keywords_regexp_for_type(base, type_mods):
     if base in ("text", "unicode", "binary"):
-        kwords = None
         if "ko" in type_mods:
-            kwords = 'Id|Header'
+            return re_ko_keywords
         elif "k" in type_mods:
-            kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
+            return re_k_keywords
         else:
             return None
-        pattern = r"""
-            \$              # Starts with a dollar, followed by...
-            (%s)            # one of the keywords, followed by...
-            (:[^$\n]+)?     # possibly an old expansion, followed by...
-            \$              # another dollar
-            """ % kwords
-        return pattern
     else:
         return None
 
@@ -2035,11 +2030,10 @@  def applyCommit(self, id):
                 kwfiles = {}
                 for file in editedFiles | filesToDelete:
                     # did this file's delta contain RCS keywords?
-                    pattern = p4_keywords_regexp_for_file(file)
+                    regexp = p4_keywords_regexp_for_file(file)
 
-                    if pattern:
+                    if regexp:
                         # this file is a possibility...look for RCS keywords.
-                        regexp = re.compile(pattern, re.VERBOSE)
                         for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
                             if regexp.search(line):
                                 if verbose:
@@ -2968,14 +2962,9 @@  def streamOneP4File(self, file, contents):
             print("\nIgnoring apple filetype file %s" % file['depotFile'])
             return
 
-        # Note that we do not try to de-mangle keywords on utf16 files,
-        # even though in theory somebody may want that.
-        pattern = p4_keywords_regexp_for_type(type_base, type_mods)
-        if pattern:
-            regexp = re.compile(pattern, re.VERBOSE)
-            text = ''.join(c.decode() for c in contents)
-            text = regexp.sub(r'$\1$', text)
-            contents = [text.encode()]
+        regexp = p4_keywords_regexp_for_type(type_base, type_mods)
+        if regexp:
+            contents = [regexp.sub(rb'$\1$', c) for c in contents]
 
         if self.largeFileSystem:
             (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)