[v1] speed up refresh_index() by utilizing preload_index()
diff mbox series

Message ID 20181029204159.18208-1-peartben@gmail.com
State New
Headers show
Series
  • [v1] speed up refresh_index() by utilizing preload_index()
Related show

Commit Message

Ben Peart Oct. 29, 2018, 8:41 p.m. UTC
From: Ben Peart <benpeart@microsoft.com>

Speed up refresh_index() by utilizing preload_index() to do most of the work
spread across multiple threads.  This works because most cache entries will
get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
called from within refresh_index().

On a Windows repo with ~200K files, this drops refresh times from 6.64
seconds to 2.87 seconds for a savings of 57%.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---

Notes:
    Base Ref: master
    Web-Diff: https://github.com/benpeart/git/commit/feee1054c2
    Checkout: git fetch https://github.com/benpeart/git refresh-index-multithread-preload-v1 && git checkout feee1054c2

 cache.h         | 3 +++
 preload-index.c | 8 ++++----
 read-cache.c    | 6 ++++++
 3 files changed, 13 insertions(+), 4 deletions(-)


base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433

Comments

Junio C Hamano Oct. 30, 2018, 2:28 a.m. UTC | #1
Ben Peart <peartben@gmail.com> writes:

> From: Ben Peart <benpeart@microsoft.com>
>
> Speed up refresh_index() by utilizing preload_index() to do most of the work
> spread across multiple threads.  This works because most cache entries will
> get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
> called from within refresh_index().
>
> On a Windows repo with ~200K files, this drops refresh times from 6.64
> seconds to 2.87 seconds for a savings of 57%.
>
> Signed-off-by: Ben Peart <benpeart@microsoft.com>
> ---

OK.  We used to only expose the whole "read the index file into an
istate, and then do the lstat() part in parallel", but now we also
make the "do the lstat() part" available separately.

Which makes sense.


> diff --git a/cache.h b/cache.h
> index f7fabdde8f..883099db08 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -659,6 +659,9 @@ extern int daemonize(void);
>  /* Initialize and use the cache information */
>  struct lock_file;
>  extern int read_index(struct index_state *);
> +extern void preload_index(struct index_state *index,
> +			  const struct pathspec *pathspec,
> +			  unsigned int refresh_flags);
>  extern int read_index_preload(struct index_state *,
>  			      const struct pathspec *pathspec,
>  			      unsigned int refresh_flags);
> diff --git a/preload-index.c b/preload-index.c
> index 9e7152ab14..222792ccbc 100644
> --- a/preload-index.c
> +++ b/preload-index.c
> @@ -9,7 +9,7 @@
>  #include "progress.h"
>  
>  #ifdef NO_PTHREADS
> -static void preload_index(struct index_state *index,
> +void preload_index(struct index_state *index,
>  			  const struct pathspec *pathspec,
>  			  unsigned int refresh_flags)
>  {
> @@ -100,9 +100,9 @@ static void *preload_thread(void *_data)
>  	return NULL;
>  }
>  
> -static void preload_index(struct index_state *index,
> -			  const struct pathspec *pathspec,
> -			  unsigned int refresh_flags)
> +void preload_index(struct index_state *index,
> +		   const struct pathspec *pathspec,
> +		   unsigned int refresh_flags)
>  {
>  	int threads, i, work, offset;
>  	struct thread_data data[MAX_PARALLEL];
> diff --git a/read-cache.c b/read-cache.c
> index d57958233e..53733d651d 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags,
>  	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
>  	added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
>  	unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
> +	/*
> +	 * Use the multi-threaded preload_index() to refresh most of the
> +	 * cache entries quickly then in the single threaded loop below,
> +	 * we only have to do the special cases that are left.
> +	 */
> +	preload_index(istate, pathspec, 0);
>  	for (i = 0; i < istate->cache_nr; i++) {
>  		struct cache_entry *ce, *new_entry;
>  		int cache_errno = 0;
>
> base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433

Patch
diff mbox series

diff --git a/cache.h b/cache.h
index f7fabdde8f..883099db08 100644
--- a/cache.h
+++ b/cache.h
@@ -659,6 +659,9 @@  extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
+extern void preload_index(struct index_state *index,
+			  const struct pathspec *pathspec,
+			  unsigned int refresh_flags);
 extern int read_index_preload(struct index_state *,
 			      const struct pathspec *pathspec,
 			      unsigned int refresh_flags);
diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..222792ccbc 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -9,7 +9,7 @@ 
 #include "progress.h"
 
 #ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
+void preload_index(struct index_state *index,
 			  const struct pathspec *pathspec,
 			  unsigned int refresh_flags)
 {
@@ -100,9 +100,9 @@  static void *preload_thread(void *_data)
 	return NULL;
 }
 
-static void preload_index(struct index_state *index,
-			  const struct pathspec *pathspec,
-			  unsigned int refresh_flags)
+void preload_index(struct index_state *index,
+		   const struct pathspec *pathspec,
+		   unsigned int refresh_flags)
 {
 	int threads, i, work, offset;
 	struct thread_data data[MAX_PARALLEL];
diff --git a/read-cache.c b/read-cache.c
index d57958233e..53733d651d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1496,6 +1496,12 @@  int refresh_index(struct index_state *istate, unsigned int flags,
 	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
 	added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
 	unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
+	/*
+	 * Use the multi-threaded preload_index() to refresh most of the
+	 * cache entries quickly then in the single threaded loop below,
+	 * we only have to do the special cases that are left.
+	 */
+	preload_index(istate, pathspec, 0);
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce, *new_entry;
 		int cache_errno = 0;