diff mbox series

[v7,5/7] read-cache: load cache extensions on a worker thread

Message ID 20181001134556.33232-6-peartben@gmail.com (mailing list archive)
State New, archived
Headers show
Series speed up index load through parallelization | expand

Commit Message

Ben Peart Oct. 1, 2018, 1:45 p.m. UTC
From: Ben Peart <benpeart@microsoft.com>

This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

In some cases, loading the extensions takes longer than loading the
cache entries so this patch utilizes the new EOIE to start the thread to
load the extensions before loading all the cache entries in parallel.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data:

	Test w/100,000 files reduced the time by 0.53%
	Test w/1,000,000 files reduced the time by 27.78%

Signed-off-by: Ben Peart <peartben@gmail.com>
---
 read-cache.c | 97 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 81 insertions(+), 16 deletions(-)

Comments

Duy Nguyen Oct. 1, 2018, 3:50 p.m. UTC | #1
On Mon, Oct 1, 2018 at 3:46 PM Ben Peart <peartben@gmail.com> wrote:
> @@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
>  static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
>  static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);
>
> +struct load_index_extensions
> +{
> +#ifndef NO_PTHREADS
> +       pthread_t pthread;
> +#endif
> +       struct index_state *istate;
> +       const char *mmap;
> +       size_t mmap_size;
> +       unsigned long src_offset;
> +};
> +
> +static void *load_index_extensions(void *_data)
> +{
> +       struct load_index_extensions *p = _data;
> +       unsigned long src_offset = p->src_offset;
> +
> +       while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
> +               /* After an array of active_nr index entries,
> +                * there can be arbitrary number of extended
> +                * sections, each of which is prefixed with
> +                * extension name (4-byte) and section length
> +                * in 4-byte network byte order.
> +                */
> +               uint32_t extsize;
> +               memcpy(&extsize, p->mmap + src_offset + 4, 4);
> +               extsize = ntohl(extsize);

This could be get_be32() so that the next person will not need to do
another cleanup patch.

> +               if (read_index_extension(p->istate,
> +                       p->mmap + src_offset,
> +                       p->mmap + src_offset + 8,
> +                       extsize) < 0) {

This alignment is misleading because the conditions are aligned with
the code block below. If you can't align it with the '(', then just
add another tab.

> +                       munmap((void *)p->mmap, p->mmap_size);

This made me pause for a bit since we should not need to cast back to
void *. It turns out you need this because mmap pointer is const. But
you don't even need to munmap here. We're dying, the OS will clean
everything up.

> +                       die(_("index file corrupt"));
> +               }
> +               src_offset += 8;
> +               src_offset += extsize;
> +       }
> +
> +       return NULL;
> +}
Ben Peart Oct. 2, 2018, 3 p.m. UTC | #2
On 10/1/2018 11:50 AM, Duy Nguyen wrote:
> On Mon, Oct 1, 2018 at 3:46 PM Ben Peart <peartben@gmail.com> wrote:
>> @@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
>>   static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
>>   static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);
>>
>> +struct load_index_extensions
>> +{
>> +#ifndef NO_PTHREADS
>> +       pthread_t pthread;
>> +#endif
>> +       struct index_state *istate;
>> +       const char *mmap;
>> +       size_t mmap_size;
>> +       unsigned long src_offset;
>> +};
>> +
>> +static void *load_index_extensions(void *_data)
>> +{
>> +       struct load_index_extensions *p = _data;
>> +       unsigned long src_offset = p->src_offset;
>> +
>> +       while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
>> +               /* After an array of active_nr index entries,
>> +                * there can be arbitrary number of extended
>> +                * sections, each of which is prefixed with
>> +                * extension name (4-byte) and section length
>> +                * in 4-byte network byte order.
>> +                */
>> +               uint32_t extsize;
>> +               memcpy(&extsize, p->mmap + src_offset + 4, 4);
>> +               extsize = ntohl(extsize);
> 
> This could be get_be32() so that the next person will not need to do
> another cleanup patch.
> 

Good point, it was existing code so I focused on doing the minimal 
change possible but I can clean it up since I'm touching it already.

>> +               if (read_index_extension(p->istate,
>> +                       p->mmap + src_offset,
>> +                       p->mmap + src_offset + 8,
>> +                       extsize) < 0) {
> 
> This alignment is misleading because the conditions are aligned with
> the code block below. If you can't align it with the '(', then just
> add another tab.
> 

Ditto. I'll make it:

		uint32_t extsize = get_be32(p->mmap + src_offset + 4);
		if (read_index_extension(p->istate,
					 p->mmap + src_offset,
					 p->mmap + src_offset + 8,
					 extsize) < 0) {
			munmap((void *)p->mmap, p->mmap_size);
			die(_("index file corrupt"));
		}


>> +                       munmap((void *)p->mmap, p->mmap_size);
> 
> This made me pause for a bit since we should not need to cast back to
> void *. It turns out you need this because mmap pointer is const. But
> you don't even need to munmap here. We're dying, the OS will clean
> everything up.
> 

I had the same thought about "we're about to die so why bother calling 
munmap() here" but I decided rather than change it, I'd follow the 
existing pattern just in case there was some platform/bug that required 
it.  I apparently doesn't cause harm as it's been that way a long time.

>> +                       die(_("index file corrupt"));
>> +               }
>> +               src_offset += 8;
>> +               src_offset += extsize;
>> +       }
>> +
>> +       return NULL;
>> +}
diff mbox series

Patch

diff --git a/read-cache.c b/read-cache.c
index af2605a168..77083ab8bb 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,7 @@ 
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#include "thread-utils.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1890,6 +1891,46 @@  static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries)
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset);
 
+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+	pthread_t pthread;
+#endif
+	struct index_state *istate;
+	const char *mmap;
+	size_t mmap_size;
+	unsigned long src_offset;
+};
+
+static void *load_index_extensions(void *_data)
+{
+	struct load_index_extensions *p = _data;
+	unsigned long src_offset = p->src_offset;
+
+	while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+		/* After an array of active_nr index entries,
+		 * there can be arbitrary number of extended
+		 * sections, each of which is prefixed with
+		 * extension name (4-byte) and section length
+		 * in 4-byte network byte order.
+		 */
+		uint32_t extsize;
+		memcpy(&extsize, p->mmap + src_offset + 4, 4);
+		extsize = ntohl(extsize);
+		if (read_index_extension(p->istate,
+			p->mmap + src_offset,
+			p->mmap + src_offset + 8,
+			extsize) < 0) {
+			munmap((void *)p->mmap, p->mmap_size);
+			die(_("index file corrupt"));
+		}
+		src_offset += 8;
+		src_offset += extsize;
+	}
+
+	return NULL;
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1900,6 +1941,11 @@  int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	const char *mmap;
 	size_t mmap_size;
 	const struct cache_entry *previous_ce = NULL;
+	struct load_index_extensions p;
+	size_t extension_offset = 0;
+#ifndef NO_PTHREADS
+	int nr_threads;
+#endif
 
 	if (istate->initialized)
 		return istate->cache_nr;
@@ -1936,6 +1982,30 @@  int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
 	istate->initialized = 1;
 
+	p.istate = istate;
+	p.mmap = mmap;
+	p.mmap_size = mmap_size;
+
+#ifndef NO_PTHREADS
+	nr_threads = git_config_get_index_threads();
+	if (!nr_threads)
+		nr_threads = online_cpus();
+
+	if (nr_threads > 1) {
+		extension_offset = read_eoie_extension(mmap, mmap_size);
+		if (extension_offset) {
+			int err;
+
+			p.src_offset = extension_offset;
+			err = pthread_create(&p.pthread, NULL, load_index_extensions, &p);
+			if (err)
+				die(_("unable to create load_index_extensions thread: %s"), strerror(err));
+
+			nr_threads--;
+		}
+	}
+#endif
+
 	if (istate->version == 4) {
 		mem_pool_init(&istate->ce_mem_pool,
 			      estimate_cache_size_from_compressed(istate->cache_nr));
@@ -1960,22 +2030,17 @@  int do_read_index(struct index_state *istate, const char *path, int must_exist)
 	istate->timestamp.sec = st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 
-	while (src_offset <= mmap_size - the_hash_algo->rawsz - 8) {
-		/* After an array of active_nr index entries,
-		 * there can be arbitrary number of extended
-		 * sections, each of which is prefixed with
-		 * extension name (4-byte) and section length
-		 * in 4-byte network byte order.
-		 */
-		uint32_t extsize;
-		extsize = get_be32(mmap + src_offset + 4);
-		if (read_index_extension(istate,
-					 mmap + src_offset,
-					 mmap + src_offset + 8,
-					 extsize) < 0)
-			goto unmap;
-		src_offset += 8;
-		src_offset += extsize;
+	/* if we created a thread, join it otherwise load the extensions on the primary thread */
+#ifndef NO_PTHREADS
+	if (extension_offset) {
+		int ret = pthread_join(p.pthread, NULL);
+		if (ret)
+			die(_("unable to join load_index_extensions thread: %s"), strerror(ret));
+	}
+#endif
+	if (!extension_offset) {
+		p.src_offset = src_offset;
+		load_index_extensions(&p);
 	}
 	munmap((void *)mmap, mmap_size);
 	return istate->cache_nr;