diff mbox series

[15/15] gentree: use 'git cat-file' to speed up obtaining objects

Message ID 20200221095437.3456e7c8b175.Iafc23c313ceb13c32022115a397ece34b2ed2780@changeid (mailing list archive)
State New, archived
Headers show
Series updates & improvements | expand

Commit Message

Johannes Berg Feb. 21, 2020, 8:56 a.m. UTC
From: Johannes Berg <johannes.berg@intel.com>

We can use the git cat-file --batch protocol to get objects,
which significantly speeds things up since we don't have to
start a new git process every time.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 gentree.py   | 23 ++++++++++++-----------
 lib/bpgit.py | 25 +++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/gentree.py b/gentree.py
index bf2965f2a8c6..2a9f60d7384b 100755
--- a/gentree.py
+++ b/gentree.py
@@ -213,17 +213,18 @@  def copy_git_files(srcpath, copy_list, rev, outdir):
     "Copy" files from a git repository. This really means listing them with
     ls-tree and then using git show to obtain all the blobs.
     """
-    for srcitem, tgtitem in copy_list:
-        for m, t, h, f in git.ls_tree(rev=rev, files=(srcitem,), tree=srcpath):
-            assert t == 'blob'
-            f = os.path.join(outdir, f.replace(srcitem, tgtitem))
-            d = os.path.dirname(f)
-            if not os.path.exists(d):
-                os.makedirs(d)
-            outf = open(f, 'w')
-            git.get_blob(h, outf, tree=srcpath)
-            outf.close()
-            os.chmod(f, int(m, 8))
+    with git.CatFile(tree=srcpath) as cf:
+        for srcitem, tgtitem in copy_list:
+            for m, t, h, f in git.ls_tree(rev=rev, files=(srcitem,), tree=srcpath):
+                assert t == 'blob'
+                f = os.path.join(outdir, f.replace(srcitem, tgtitem))
+                d = os.path.dirname(f)
+                if not os.path.exists(d):
+                    os.makedirs(d)
+                outf = open(f, 'w')
+                cf.get_blob(h, outf)
+                outf.close()
+                os.chmod(f, int(m, 8))
 
 def automatic_backport_mangle_c_file(name):
     return name.replace('/', '-')
diff --git a/lib/bpgit.py b/lib/bpgit.py
index 60d4abaa7a0d..7b57f6b2690a 100644
--- a/lib/bpgit.py
+++ b/lib/bpgit.py
@@ -357,3 +357,28 @@  def diff(tree=None, extra_args=None):
     _check(process)
 
     return stdout
+
+class CatFile(object):
+    def __init__(self, tree=None):
+        self.tree = tree
+        self.p = None
+
+    def __enter__(self):
+        self.p = subprocess.Popen(['git', 'cat-file', '--batch'], cwd=self.tree,
+                                  stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+        return self
+
+    def get_blob(self, sha, outf):
+        self.p.stdin.write(sha + '\n')
+        hdr = self.p.stdout.readline().split()
+        assert len(hdr) == 3
+        assert hdr[1] == 'blob'
+        size = int(hdr[2])
+        outf.write(self.p.stdout.read(size))
+        assert self.p.stdout.readline() == '\n'
+
+    def __exit__(self, type, value, traceback):
+        self.p.stdin.close()
+        self.p.wait()
+        _check(self.p)
+        self.p = None