[GSoC,1/4] object-store: add lock to read_object_file_extended()
diff mbox series

Message ID 052de4c139bf4962182e6cb8f4aa315aa6130124.1565468806.git.matheus.bernardino@usp.br
State New
Headers show
Series
  • grep: re-enable threads when cached, w/ parallel inflation
Related show

Commit Message

Matheus Tavares Bernardino Aug. 10, 2019, 8:27 p.m. UTC
Allow read_object_file_extended() to be called by multiple threads
protecting it with a lock. The lock usage can be toggled with
enable_obj_read_lock() and disable_obj_read_lock().

Probably there are many spots in read_object_file_extended()'s call
chain that could be executed unlocked (and thus, in parallel). But, for
now, we are only interested in allowing parallel access to zlib
inflation. This is one of the sections where object reading spends most
of the time and it's already thread-safe. So, to take advantage of that,
the lock is released when entering it and re-acquired right after. We
may refine the lock to also exploit other possible parallel spots in the
future, but threaded zlib inflation should already give great speedups.

Note that add_delta_base_cache() was also modified to skip adding
already present entries to the cache. This wasn't possible before, but
now it is since phase I and phase III of unpack_entry() may execute
concurrently.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 object-store.h |  4 ++++
 packfile.c     |  7 ++++++
 sha1-file.c    | 61 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 67 insertions(+), 5 deletions(-)

Patch
diff mbox series

diff --git a/object-store.h b/object-store.h
index 7f7b3cdd80..cfc9484995 100644
--- a/object-store.h
+++ b/object-store.h
@@ -159,6 +159,10 @@  const char *loose_object_path(struct repository *r, struct strbuf *buf,
 void *map_loose_object(struct repository *r, const struct object_id *oid,
 		       unsigned long *size);
 
+void enable_obj_read_lock(void);
+void disable_obj_read_lock(void);
+void obj_read_lock(void);
+void obj_read_unlock(void);
 void *read_object_file_extended(struct repository *r,
 				const struct object_id *oid,
 				enum object_type *type,
diff --git a/packfile.c b/packfile.c
index fc43a6c52c..de93dc50e2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1115,7 +1115,9 @@  unsigned long get_size_from_delta(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		curpos += stream.next_in - in;
 	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
 		 stream.total_out < sizeof(delta_head));
@@ -1468,6 +1470,9 @@  static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
 	struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
 	struct list_head *lru, *tmp;
 
+	if (get_delta_base_cache_entry(p, base_offset))
+		return;
+
 	delta_base_cached += base_size;
 
 	list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
@@ -1597,7 +1602,9 @@  static void *unpack_compressed_entry(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		if (!stream.avail_out)
 			break; /* the payload is larger than it should be */
 		curpos += stream.next_in - in;
diff --git a/sha1-file.c b/sha1-file.c
index 84fd02f107..f5ff51aedb 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1560,16 +1560,54 @@  int pretend_object_file(void *buf, unsigned long len, enum object_type type,
 	return 0;
 }
 
+static pthread_mutex_t obj_read_mutex;
+static int obj_read_use_lock = 0;
+
+/*
+ * Enabling the object read lock allows multiple threads to safely call the
+ * following functions in parallel: repo_read_object_file(), read_object_file()
+ * and read_object_file_extended().
+ */
+void enable_obj_read_lock(void)
+{
+	if (obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 1;
+	pthread_mutex_init(&obj_read_mutex, NULL);
+}
+
+void disable_obj_read_lock(void)
+{
+	if (!obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 0;
+	pthread_mutex_destroy(&obj_read_mutex);
+}
+
+void obj_read_lock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_lock(&obj_read_mutex);
+}
+
+void obj_read_unlock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_unlock(&obj_read_mutex);
+}
+
 /*
  * This function dies on corrupt objects; the callers who want to
  * deal with them should arrange to call read_object() and give error
  * messages themselves.
  */
-void *read_object_file_extended(struct repository *r,
-				const struct object_id *oid,
-				enum object_type *type,
-				unsigned long *size,
-				int lookup_replace)
+static void *do_read_object_file_extended(struct repository *r,
+					  const struct object_id *oid,
+					  enum object_type *type,
+					  unsigned long *size,
+					  int lookup_replace)
 {
 	void *data;
 	const struct packed_git *p;
@@ -1602,6 +1640,19 @@  void *read_object_file_extended(struct repository *r,
 	return NULL;
 }
 
+void *read_object_file_extended(struct repository *r,
+				const struct object_id *oid,
+				enum object_type *type,
+				unsigned long *size,
+				int lookup_replace)
+{
+	void *data;
+	obj_read_lock();
+	data = do_read_object_file_extended(r, oid, type, size, lookup_replace);
+	obj_read_unlock();
+	return data;
+}
+
 void *read_object_with_reference(struct repository *r,
 				 const struct object_id *oid,
 				 const char *required_type_name,