[GSoC,RFC,WIP,1/3] object-store: make read_object_file_extended() thread-safe
diff mbox series

Message ID 1525a0b55c4dddef09c9ea8ed26def21a3e790aa.1563570204.git.matheus.bernardino@usp.br
State New
Headers show
Series
  • grep: allow parallelism in zlib inflation
Related show

Commit Message

Matheus Tavares Bernardino July 19, 2019, 11:08 p.m. UTC
Allow read_object_file_extended() to be called by multiple threads
protecting it with a lock. The lock usage can be toggled with
enable_obj_read_lock() and disable_obj_read_lock().

Probably there are many spots in read_object_file_extended()'s call
chain that could be executed unlocked (and thus, in parallel). But, just
to make sure, let's protect everthing for now, and go refining the lock
step-by-step in the future.

The only (and perhaps most important) exception is git_inflate. Since it
is already thread-safe and takes a significant amount of time, the lock
is released when entering this function, so that it can be performed in
parallel. This should already bring good performance, because of
inflation's time cost.

Note that add_delta_base_cache() was also modified to skip adding
already present cache entries. This wouldn't happen in the past, but now
it's possible as phase I and phase III of unpack_entry() may execute
concurrently.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 object-store.h |  4 ++++
 packfile.c     |  7 +++++++
 sha1-file.c    | 56 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 62 insertions(+), 5 deletions(-)

Patch
diff mbox series

diff --git a/object-store.h b/object-store.h
index 49f56ab8d9..8330ff3988 100644
--- a/object-store.h
+++ b/object-store.h
@@ -157,6 +157,10 @@  const char *loose_object_path(struct repository *r, struct strbuf *buf,
 void *map_loose_object(struct repository *r, const struct object_id *oid,
 		       unsigned long *size);
 
+void enable_obj_read_lock(void);
+void disable_obj_read_lock(void);
+void obj_read_lock(void);
+void obj_read_unlock(void);
 void *read_object_file_extended(struct repository *r,
 				const struct object_id *oid,
 				enum object_type *type,
diff --git a/packfile.c b/packfile.c
index c0d83fdfed..a50be4e5e5 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1087,7 +1087,9 @@  unsigned long get_size_from_delta(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		curpos += stream.next_in - in;
 	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
 		 stream.total_out < sizeof(delta_head));
@@ -1440,6 +1442,9 @@  static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
 	struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
 	struct list_head *lru, *tmp;
 
+	if (get_delta_base_cache_entry(p, base_offset))
+		return;
+
 	delta_base_cached += base_size;
 
 	list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
@@ -1569,7 +1574,9 @@  static void *unpack_compressed_entry(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		if (!stream.avail_out)
 			break; /* the payload is larger than it should be */
 		curpos += stream.next_in - in;
diff --git a/sha1-file.c b/sha1-file.c
index 888b6024d5..37cde4a494 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1463,16 +1463,49 @@  int pretend_object_file(void *buf, unsigned long len, enum object_type type,
 	return 0;
 }
 
+static pthread_mutex_t obj_read_mutex;
+static int obj_read_use_lock = 0;
+
+void enable_obj_read_lock(void)
+{
+	if (obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 1;
+	pthread_mutex_init(&obj_read_mutex, NULL);
+}
+
+void disable_obj_read_lock(void)
+{
+	if (!obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 0;
+	pthread_mutex_destroy(&obj_read_mutex);
+}
+
+void obj_read_lock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_lock(&obj_read_mutex);
+}
+
+void obj_read_unlock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_unlock(&obj_read_mutex);
+}
+
 /*
  * This function dies on corrupt objects; the callers who want to
  * deal with them should arrange to call read_object() and give error
  * messages themselves.
  */
-void *read_object_file_extended(struct repository *r,
-				const struct object_id *oid,
-				enum object_type *type,
-				unsigned long *size,
-				int lookup_replace)
+static void *do_read_object_file_extended(struct repository *r,
+					  const struct object_id *oid,
+					  enum object_type *type,
+					  unsigned long *size,
+					  int lookup_replace)
 {
 	void *data;
 	const struct packed_git *p;
@@ -1505,6 +1538,19 @@  void *read_object_file_extended(struct repository *r,
 	return NULL;
 }
 
+void *read_object_file_extended(struct repository *r,
+				const struct object_id *oid,
+				enum object_type *type,
+				unsigned long *size,
+				int lookup_replace)
+{	
+	void *data;
+	obj_read_lock();
+	data = do_read_object_file_extended(r, oid, type, size, lookup_replace);
+	obj_read_unlock();
+	return data;
+}
+
 void *read_object_with_reference(const struct object_id *oid,
 				 const char *required_type_name,
 				 unsigned long *size,