[1/1] unpack-trees: skip stat on fsmonitor-valid files
diff mbox series

Message ID f76ba554ed25fb9877a223ef6481834f1831c8ca.1572910885.git.gitgitgadget@gmail.com
State New
Headers show
Series
  • unpack-trees: skip stat on fsmonitor-valid files
Related show

Commit Message

ryenus via GitGitGadget Nov. 4, 2019, 11:41 p.m. UTC
From: Utsav Shah <utsav@dropbox.com>

The index might be aware that a file hasn't modified via fsmonitor, but
unpack-trees did not pay attention to it and checked via ie_match_stat
which can be inefficient on certain filesystems. This significantly slows
down commands that run oneway_merge, like checkout and reset --hard.

This patch makes oneway_merge check whether a file is considered
unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees
also now correctly copies over fsmonitor validity state from the source
index. Finally, for correctness, we force a refresh of fsmonitor state in
tweak_fsmonitor.

After this change, commands like stash (that use reset --hard
internally) go from 8s or more to ~2s on a 250k file repository on a
mac.

Signed-off-by: Utsav Shah <utsav@dropbox.com>
---
 fsmonitor.c                       | 20 +++++++++++---------
 t/t7113-post-index-change-hook.sh |  3 ---
 t/t7519-status-fsmonitor.sh       |  9 +++++++--
 unpack-trees.c                    |  6 +++++-
 4 files changed, 23 insertions(+), 15 deletions(-)

Comments

Utsav Shah Nov. 5, 2019, 3:29 p.m. UTC | #1
Sorry for the noise, I've resubmitted this patch on the original
thread (pull.424.v2.git.1572967644.gitgitgadget@gmail.com).


On Mon, Nov 4, 2019 at 3:41 PM Utsav Shah via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Utsav Shah <utsav@dropbox.com>
>
> The index might be aware that a file hasn't modified via fsmonitor, but
> unpack-trees did not pay attention to it and checked via ie_match_stat
> which can be inefficient on certain filesystems. This significantly slows
> down commands that run oneway_merge, like checkout and reset --hard.
>
> This patch makes oneway_merge check whether a file is considered
> unchanged through fsmonitor and skips ie_match_stat on it. unpack-trees
> also now correctly copies over fsmonitor validity state from the source
> index. Finally, for correctness, we force a refresh of fsmonitor state in
> tweak_fsmonitor.
>
> After this change, commands like stash (that use reset --hard
> internally) go from 8s or more to ~2s on a 250k file repository on a
> mac.
>
> Signed-off-by: Utsav Shah <utsav@dropbox.com>
> ---
>  fsmonitor.c                       | 20 +++++++++++---------
>  t/t7113-post-index-change-hook.sh |  3 ---
>  t/t7519-status-fsmonitor.sh       |  9 +++++++--
>  unpack-trees.c                    |  6 +++++-
>  4 files changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/fsmonitor.c b/fsmonitor.c
> index 1f4aa1b150..4362bc6ee9 100644
> --- a/fsmonitor.c
> +++ b/fsmonitor.c
> @@ -55,9 +55,10 @@ int read_fsmonitor_extension(struct index_state *istate, const void *data,
>         }
>         istate->fsmonitor_dirty = fsmonitor_dirty;
>
> -       if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
> -               BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
> -                   (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
> +       if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
> +               BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
> +                   (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
> +
>
>         trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful");
>         return 0;
> @@ -83,9 +84,9 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
>         uint32_t ewah_size = 0;
>         int fixup = 0;
>
> -       if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
> -               BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
> -                   (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
> +       if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
> +               BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
> +                   (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
>
>         put_be32(&hdr_version, INDEX_EXTENSION_VERSION);
>         strbuf_add(sb, &hdr_version, sizeof(uint32_t));
> @@ -189,6 +190,9 @@ void refresh_fsmonitor(struct index_state *istate)
>                 }
>                 if (bol < query_result.len)
>                         fsmonitor_refresh_callback(istate, buf + bol);
> +
> +               if (istate->untracked)
> +                       istate->untracked->use_fsmonitor = 1;
>         } else {
>                 /* Mark all entries invalid */
>                 for (i = 0; i < istate->cache_nr; i++)
> @@ -257,9 +261,7 @@ void tweak_fsmonitor(struct index_state *istate)
>                                     (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
>                         ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
>
> -                       /* Now mark the untracked cache for fsmonitor usage */
> -                       if (istate->untracked)
> -                               istate->untracked->use_fsmonitor = 1;
> +                       refresh_fsmonitor(istate);
>                 }
>
>                 ewah_free(istate->fsmonitor_dirty);
> diff --git a/t/t7113-post-index-change-hook.sh b/t/t7113-post-index-change-hook.sh
> index f011ad7eec..5ca2279d0d 100755
> --- a/t/t7113-post-index-change-hook.sh
> +++ b/t/t7113-post-index-change-hook.sh
> @@ -50,9 +50,6 @@ test_expect_success 'test status, add, commit, others trigger hook without flags
>         git checkout -- dir1/file1.txt &&
>         test_path_is_file testsuccess && rm -f testsuccess &&
>         test_path_is_missing testfailure &&
> -       git update-index &&
> -       test_path_is_missing testsuccess &&
> -       test_path_is_missing testfailure &&
>         git reset --soft &&
>         test_path_is_missing testsuccess &&
>         test_path_is_missing testfailure
> diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
> index d8df990972..9cac3d3d8e 100755
> --- a/t/t7519-status-fsmonitor.sh
> +++ b/t/t7519-status-fsmonitor.sh
> @@ -106,6 +106,8 @@ EOF
>
>  # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit
>  test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' '
> +       write_script .git/hooks/fsmonitor-test<<-\EOF &&
> +       EOF
>         git update-index --fsmonitor &&
>         git update-index --fsmonitor-valid dir1/modified &&
>         git update-index --fsmonitor-valid dir2/modified &&
> @@ -164,6 +166,8 @@ EOF
>
>  # test that newly added files are marked valid
>  test_expect_success 'newly added files are marked valid' '
> +       write_script .git/hooks/fsmonitor-test<<-\EOF &&
> +       EOF
>         git add new &&
>         git add dir1/new &&
>         git add dir2/new &&
> @@ -218,11 +222,12 @@ test_expect_success '*only* files returned by the integration script get flagged
>  # Ensure commands that call refresh_index() to move the index back in time
>  # properly invalidate the fsmonitor cache
>  test_expect_success 'refresh_index() invalidates fsmonitor cache' '
> -       write_script .git/hooks/fsmonitor-test<<-\EOF &&
> -       EOF
>         clean_repo &&
> +       write_integration_script &&
>         dirty_repo &&
>         git add . &&
> +       write_script .git/hooks/fsmonitor-test<<-\EOF &&
> +       EOF
>         git commit -m "to reset" &&
>         git reset HEAD~1 &&
>         git status >actual &&
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 33ea7810d8..fc5ceb932c 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -1504,6 +1504,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
>         o->merge_size = len;
>         mark_all_ce_unused(o->src_index);
>
> +       if (o->src_index->fsmonitor_last_update)
> +               o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update;
> +
>         /*
>          * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries
>          */
> @@ -2384,7 +2387,8 @@ int oneway_merge(const struct cache_entry * const *src,
>
>         if (old && same(old, a)) {
>                 int update = 0;
> -               if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old)) {
> +               if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) &&
> +                       !(old->ce_flags & CE_FSMONITOR_VALID)) {
>                         struct stat st;
>                         if (lstat(old->name, &st) ||
>                             ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE))
> --
> gitgitgadget

Patch
diff mbox series

diff --git a/fsmonitor.c b/fsmonitor.c
index 1f4aa1b150..4362bc6ee9 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -55,9 +55,10 @@  int read_fsmonitor_extension(struct index_state *istate, const void *data,
 	}
 	istate->fsmonitor_dirty = fsmonitor_dirty;
 
-	if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
-		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
-		    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
+	if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
+		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
+		    (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
+
 
 	trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful");
 	return 0;
@@ -83,9 +84,9 @@  void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
 	uint32_t ewah_size = 0;
 	int fixup = 0;
 
-	if (istate->fsmonitor_dirty->bit_size > istate->cache_nr)
-		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)",
-		    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
+	if (!istate->split_index && istate->fsmonitor_dirty->bit_size > istate->cache_nr)
+		BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %"PRIuMAX")",
+		    (uintmax_t)istate->fsmonitor_dirty->bit_size, (uintmax_t)istate->cache_nr);
 
 	put_be32(&hdr_version, INDEX_EXTENSION_VERSION);
 	strbuf_add(sb, &hdr_version, sizeof(uint32_t));
@@ -189,6 +190,9 @@  void refresh_fsmonitor(struct index_state *istate)
 		}
 		if (bol < query_result.len)
 			fsmonitor_refresh_callback(istate, buf + bol);
+
+		if (istate->untracked)
+			istate->untracked->use_fsmonitor = 1;
 	} else {
 		/* Mark all entries invalid */
 		for (i = 0; i < istate->cache_nr; i++)
@@ -257,9 +261,7 @@  void tweak_fsmonitor(struct index_state *istate)
 				    (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr);
 			ewah_each_bit(istate->fsmonitor_dirty, fsmonitor_ewah_callback, istate);
 
-			/* Now mark the untracked cache for fsmonitor usage */
-			if (istate->untracked)
-				istate->untracked->use_fsmonitor = 1;
+			refresh_fsmonitor(istate);
 		}
 
 		ewah_free(istate->fsmonitor_dirty);
diff --git a/t/t7113-post-index-change-hook.sh b/t/t7113-post-index-change-hook.sh
index f011ad7eec..5ca2279d0d 100755
--- a/t/t7113-post-index-change-hook.sh
+++ b/t/t7113-post-index-change-hook.sh
@@ -50,9 +50,6 @@  test_expect_success 'test status, add, commit, others trigger hook without flags
 	git checkout -- dir1/file1.txt &&
 	test_path_is_file testsuccess && rm -f testsuccess &&
 	test_path_is_missing testfailure &&
-	git update-index &&
-	test_path_is_missing testsuccess &&
-	test_path_is_missing testfailure &&
 	git reset --soft &&
 	test_path_is_missing testsuccess &&
 	test_path_is_missing testfailure
diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index d8df990972..9cac3d3d8e 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -106,6 +106,8 @@  EOF
 
 # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit
 test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' '
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	EOF
 	git update-index --fsmonitor &&
 	git update-index --fsmonitor-valid dir1/modified &&
 	git update-index --fsmonitor-valid dir2/modified &&
@@ -164,6 +166,8 @@  EOF
 
 # test that newly added files are marked valid
 test_expect_success 'newly added files are marked valid' '
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	EOF
 	git add new &&
 	git add dir1/new &&
 	git add dir2/new &&
@@ -218,11 +222,12 @@  test_expect_success '*only* files returned by the integration script get flagged
 # Ensure commands that call refresh_index() to move the index back in time
 # properly invalidate the fsmonitor cache
 test_expect_success 'refresh_index() invalidates fsmonitor cache' '
-	write_script .git/hooks/fsmonitor-test<<-\EOF &&
-	EOF
 	clean_repo &&
+	write_integration_script &&
 	dirty_repo &&
 	git add . &&
+	write_script .git/hooks/fsmonitor-test<<-\EOF &&
+	EOF
 	git commit -m "to reset" &&
 	git reset HEAD~1 &&
 	git status >actual &&
diff --git a/unpack-trees.c b/unpack-trees.c
index 33ea7810d8..fc5ceb932c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1504,6 +1504,9 @@  int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 	o->merge_size = len;
 	mark_all_ce_unused(o->src_index);
 
+	if (o->src_index->fsmonitor_last_update)
+		o->result.fsmonitor_last_update = o->src_index->fsmonitor_last_update;
+
 	/*
 	 * Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries
 	 */
@@ -2384,7 +2387,8 @@  int oneway_merge(const struct cache_entry * const *src,
 
 	if (old && same(old, a)) {
 		int update = 0;
-		if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old)) {
+		if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) &&
+			!(old->ce_flags & CE_FSMONITOR_VALID)) {
 			struct stat st;
 			if (lstat(old->name, &st) ||
 			    ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE))