diff mbox series

[5/5] Accelerate clear_skip_worktree_from_present_files() by caching

Message ID e68028ebe0afc1bb9e623efbdd30de5a8f0740bf.1642092230.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Remove the present-despite-SKIP_WORKTREE class of bugs (for sparse-checkouts) | expand

Commit Message

Elijah Newren Jan. 13, 2022, 4:43 p.m. UTC
From: Elijah Newren <newren@gmail.com>

Trying to clear the skip-worktree bit from files that are present does
present some computational overhead, for sparse-checkouts.  (We do not
do the bit clearing in non-sparse-checkouts.)  Optimize it as follows:

Rather than lstat()'ing every SKIP_WORKTREE path, take advantage of the
fact that entire directories will often be missing, especially for cone
mode and even more so ever since commit 55dfcf9591 ("sparse-checkout:
clear tracked sparse dirs", 2021-09-08).  If we have already determined
that the parent directory of a file (or other previous ancestor) does
not exist, then the file cannot exist either so we do not need to
lstat() it separately.

Timings for p2000 included below, reformatted to fit in normal commit
message line lengths, which compare three things:
  * Timings before this series
  * Timings of the unoptimized version of
    clear_skip_worktree_from_present_files() from a few commits ago
  * Timings after the optimization in this commit

(NOTE: t/perf/ appears to have timing resolution only down to 0.01 s,
which presents significant measurement error when timings only differ by
0.01s.  I don't trust any such timings below, and yet all the optimized
results differ by at most 0.01s.)

Test        Before Series    Unoptimized              Optimized
-----------------------------------------------------------------------------
*git status*
full-v3     0.15(0.10+0.06)  0.32(0.16+0.17) +113.3%  0.16(0.10+0.07) +6.7%
full-v4     0.15(0.11+0.05)  0.32(0.17+0.16) +113.3%  0.16(0.11+0.05) +6.7%
sparse-v3   0.04(0.03+0.04)  0.04(0.02+0.05) +0.0%    0.04(0.02+0.05) +0.0%
sparse-v4   0.04(0.03+0.04)  0.04(0.02+0.05) +0.0%    0.04(0.03+0.05) +0.0%

*git add -A*
full-v3     0.40(0.30+0.07)  0.56(0.36+0.17) +40.0%   0.39(0.30+0.07) -2.5%
full-v4     0.37(0.28+0.07)  0.54(0.37+0.16) +45.9%   0.38(0.29+0.07) +2.7%
sparse-v3   0.06(0.04+0.05)  0.08(0.05+0.05) +33.3%   0.06(0.05+0.04) +0.0%
sparse-v4   0.05(0.03+0.05)  0.05(0.04+0.04) +0.0%    0.06(0.04+0.05) +20.0%

*git add .*
full-v3     0.40(0.31+0.07)  0.57(0.37+0.17) +42.5%   0.41(0.30+0.08) +2.5%
full-v4     0.38(0.30+0.06)  0.55(0.37+0.16) +44.7%   0.38(0.30+0.06) +0.0%
sparse-v3   0.06(0.04+0.05)  0.06(0.05+0.04) +0.0%    0.06(0.03+0.05) +0.0%
sparse-v4   0.06(0.05+0.05)  0.06(0.04+0.05) +0.0%    0.06(0.04+0.06) +0.0%

*git commit -a -m A*
full-v3     0.41(0.32+0.06)  0.58(0.39+0.17) +41.5%   0.42(0.32+0.07) +2.4%
full-v4     0.39(0.30+0.07)  0.56(0.38+0.17) +43.6%   0.40(0.31+0.07) +2.6%
sparse-v3   0.04(0.03+0.04)  0.04(0.03+0.04) +0.0%    0.04(0.03+0.04) +0.0%
sparse-v4   0.04(0.03+0.05)  0.04(0.03+0.05) +0.0%    0.04(0.03+0.04) +0.0%

*git checkout -f -*
full-v3     0.56(0.46+0.07)  0.73(0.55+0.16) +30.4%   0.57(0.47+0.08) +1.8%
full-v4     0.54(0.45+0.07)  0.71(0.53+0.17) +31.5%   0.55(0.45+0.07) +1.9%
sparse-v3   0.06(0.04+0.04)  0.06(0.04+0.05) +0.0%    0.06(0.04+0.05) +0.0%
sparse-v4   0.05(0.05+0.04)  0.05(0.04+0.05) +0.0%    0.06(0.04+0.05) +20.0%

*git reset*
full-v3     0.34(0.26+0.05)  0.51(0.34+0.15) +50.0%   0.34(0.26+0.06) +0.0%
full-v4     0.32(0.24+0.06)  0.49(0.32+0.15) +53.1%   0.33(0.25+0.06) +3.1%
sparse-v3   0.04(0.03+0.04)  0.04(0.03+0.04) +0.0%    0.04(0.03+0.04) +0.0%
sparse-v4   0.03(0.03+0.04)  0.03(0.02+0.04) +0.0%    0.03(0.03+0.04) +0.0%

*git reset --hard*
full-v3     0.57(0.46+0.07)  0.90(0.61+0.25) +57.9%   0.57(0.45+0.08) +0.0%
full-v4     0.54(0.46+0.05)  0.88(0.59+0.26) +63.0%   0.55(0.45+0.07) +1.9%
sparse-v3   0.07(0.03+0.03)  0.07(0.04+0.03) +0.0%    0.07(0.03+0.03) +0.0%
sparse-v4   0.06(0.03+0.03)  0.06(0.04+0.02) +0.0%    0.06(0.03+0.03) +0.0%

*git reset -- does-not-exist*
full-v3     0.35(0.27+0.06)  0.52(0.32+0.17) +48.6%   0.35(0.27+0.06) +0.0%
full-v4     0.33(0.26+0.05)  0.50(0.33+0.15) +51.5%   0.33(0.26+0.06) +0.0%
sparse-v3   0.04(0.03+0.04)  0.04(0.03+0.04) +0.0%    0.04(0.03+0.04) +0.0%
sparse-v4   0.04(0.02+0.04)  0.03(0.02+0.04) -25.0%   0.03(0.02+0.04) -25.0%

*git diff*
full-v3     0.07(0.04+0.04)  0.24(0.11+0.14) +242.9%  0.07(0.04+0.04) +0.0%
full-v4     0.07(0.03+0.05)  0.24(0.13+0.12) +242.9%  0.08(0.04+0.05) +14.3%
sparse-v3   0.02(0.01+0.04)  0.02(0.01+0.04) +0.0%    0.02(0.01+0.05) +0.0%
sparse-v4   0.02(0.02+0.03)  0.02(0.01+0.04) +0.0%    0.02(0.01+0.04) +0.0%

*git diff --cached*
full-v3     0.05(0.03+0.02)  0.22(0.12+0.09) +340.0%  0.05(0.03+0.01) +0.0%
full-v4     0.05(0.03+0.01)  0.23(0.12+0.11) +360.0%  0.05(0.03+0.02) +0.0%
sparse-v3   0.01(0.00+0.00)  0.01(0.00+0.00) +0.0%    0.01(0.00+0.00) +0.0%
sparse-v4   0.01(0.00+0.00)  0.01(0.00+0.00) +0.0%    0.01(0.00+0.00) +0.0%

*git blame f2/f4/a*
full-v3     0.18(0.13+0.05)  0.52(0.29+0.23) +188.9%  0.19(0.15+0.04) +5.6%
full-v4     0.19(0.15+0.04)  0.52(0.28+0.23) +173.7%  0.19(0.14+0.04) +0.0%
sparse-v3   0.10(0.08+0.02)  0.10(0.09+0.01) +0.0%    0.10(0.09+0.01) +0.0%
sparse-v4   0.10(0.08+0.02)  0.10(0.08+0.02) +0.0%    0.10(0.08+0.02) +0.0%

*git blame f2/f4/f3/a*
full-v3     0.45(0.36+0.08)  0.78(0.51+0.27) +73.3%   0.45(0.37+0.08) +0.0%
full-v4     0.45(0.37+0.08)  0.78(0.51+0.26) +73.3%   0.45(0.37+0.08) +0.0%
sparse-v3   0.36(0.32+0.04)  0.36(0.31+0.05) +0.0%    0.36(0.31+0.04) +0.0%
sparse-v4   0.36(0.31+0.05)  0.36(0.31+0.05) +0.0%    0.36(0.31+0.04) +0.0%

*git checkout-index -f --all*
full-v3     0.07(0.02+0.05)  0.24(0.12+0.12) +242.9%  0.08(0.04+0.04) +14.3%
full-v4     0.07(0.03+0.04)  0.24(0.11+0.13) +242.9%  0.08(0.03+0.04) +14.3%
sparse-v3   0.04(0.01+0.03)  0.04(0.00+0.03) +0.0%    0.04(0.01+0.03) +0.0%
sparse-v4   0.04(0.01+0.02)  0.04(0.01+0.03) +0.0%    0.04(0.01+0.02) +0.0%

*git update-index --add --remove f2/f4/a*
full-v3     0.29(0.23+0.02)  0.46(0.30+0.12) +58.6%   0.30(0.24+0.02) +3.4%
full-v4     0.27(0.22+0.02)  0.45(0.29+0.12) +66.7%   0.28(0.22+0.03) +3.7%
sparse-v3   0.02(0.02+0.00)  0.02(0.01+0.00) +0.0%    0.02(0.01+0.00) +0.0%
sparse-v4   0.02(0.02+0.00)  0.02(0.02+0.00) +0.0%    0.02(0.02+0.00) +0.0%

So, with the optimization, the extra work appears to be essentially 0
for sparse-checkouts that are also using sparse-indexes (even before my
optimization), and the extra work appears to be just marginally more
than 0 for sparse-checkouts that are using full indexes.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 sparse-index.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

Comments

Elijah Newren Jan. 13, 2022, 11:35 p.m. UTC | #1
On Thu, Jan 13, 2022 at 8:43 AM Elijah Newren via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Elijah Newren <newren@gmail.com>
>
> Trying to clear the skip-worktree bit from files that are present does
> present some computational overhead, for sparse-checkouts.  (We do not
> do the bit clearing in non-sparse-checkouts.)  Optimize it as follows:
>
...
> (NOTE: t/perf/ appears to have timing resolution only down to 0.01 s,
> which presents significant measurement error when timings only differ by
> 0.01s.  I don't trust any such timings below, and yet all the optimized
> results differ by at most 0.01s.)

To follow up on this using using a tool with higher precision for a
few selected cases:

> Test        Before Series    Unoptimized              Optimized
> -----------------------------------------------------------------------------
...
> *git add -A*
> full-v3     0.40(0.30+0.07)  0.56(0.36+0.17) +40.0%   0.39(0.30+0.07) -2.5%

In hyperfine, the results were:

before-series,full-v3$ hyperfine '~/floss/git/git add -A'
Benchmark #1: ~/floss/git/git add -A
  Time (mean ± σ):     129.4 ms ±   3.0 ms    [User: 84.0 ms, System: 59.9 ms]
  Range (min … max):   122.5 ms … 138.3 ms    24 runs

post-optim,full-v3$ hyperfine '~/floss/git/git add -A'
Benchmark #1: ~/floss/git/git add -A
  Time (mean ± σ):     133.4 ms ±   3.0 ms    [User: 85.6 ms, System: 61.4 ms]
  Range (min … max):   129.7 ms … 142.7 ms    22 runs

Since 133.4/129.4 ~ 1.0309, we see that we have +3.1% on the execution
timing due to changes in this series.  Notably, the number is actually
positive as we'd expect since we are clearly doing more work, whereas
the t/perf system reports -2.5% due to measurement inaccuracy.

> full-v4     0.37(0.28+0.07)  0.54(0.37+0.16) +45.9%   0.38(0.29+0.07) +2.7%
> sparse-v3   0.06(0.04+0.05)  0.08(0.05+0.05) +33.3%   0.06(0.05+0.04) +0.0%
> sparse-v4   0.05(0.03+0.05)  0.05(0.04+0.04) +0.0%    0.06(0.04+0.05) +20.0%

before-series,sparse-v4$ hyperfine '~/floss/git/git add -A'
Benchmark #1: ~/floss/git/git add -A
  Time (mean ± σ):      37.0 ms ±   1.1 ms    [User: 23.1 ms, System: 47.8 ms]
  Range (min … max):    35.6 ms …  42.1 ms    72 runs

post-optim,sparse-v4$ hyperfine '~/floss/git/git add -A'
Benchmark #1: ~/floss/git/git add -A
  Time (mean ± σ):      37.3 ms ±   0.9 ms    [User: 22.8 ms, System: 48.7 ms]
  Range (min … max):    36.2 ms …  40.7 ms    72 runs

37.3/37.0 ~ 1.008, so a +0.8% on execution timing.  Hard to measure,
and nice that hyperfine automatically chooses to run this 72 times
just to ensure the data is more reliable.  (Also, the +0.8% is much
more believable than the +20.0% from lack of measurement accuracy,
especially combined with data below)

> *git diff*
> full-v3     0.07(0.04+0.04)  0.24(0.11+0.14) +242.9%  0.07(0.04+0.04) +0.0%
> full-v4     0.07(0.03+0.05)  0.24(0.13+0.12) +242.9%  0.08(0.04+0.05) +14.3%

before-series,full-v4$ hyperfine '~/floss/git/git diff'
Benchmark #1: ~/floss/git/git diff
  Time (mean ± σ):      69.9 ms ±   1.9 ms    [User: 37.5 ms, System: 47.6 ms]
  Range (min … max):    66.5 ms …  74.0 ms    44 runs

post-optim,full-v4$ hyperfine '~/floss/git/git diff'
Benchmark #1: ~/floss/git/git diff
  Time (mean ± σ):      73.0 ms ±   1.7 ms    [User: 40.7 ms, System: 47.1 ms]
  Range (min … max):    70.3 ms …  76.2 ms    40 runs

So 73.0/69.9 ~ 1.044, so a +4.4% to the execution timing -- much less
than the +14.3% reported due to lack of measurement accuracy.

(If I had used ~/floss/git/bin-wrappers/git instead of
~/floss/git/git, the timings would have been going from 72.3ms to
75.1ms, representing a +3.9% increase instead.  Probably doesn't
matter, but just in case folks were curious about what if I used the
bin-wrappers.)

> sparse-v3   0.02(0.01+0.04)  0.02(0.01+0.04) +0.0%    0.02(0.01+0.05) +0.0%
> sparse-v4   0.02(0.02+0.03)  0.02(0.01+0.04) +0.0%    0.02(0.01+0.04) +0.0%

Here, the timings went from 17.5ms to 17.6ms, or +0.6%.  It's really
hard to measure the overhead of the changes of my patch series when
sparse-index is in use, which makes sense since the extra loop only
has to check the toplevel directory, not any of the entries under it.
But it does show just a minor cost in runtime due to the new added
check.



I think the original commit message makes it clear that with the (new)
optimization, the timings are quite reasonable, but maybe this little
extra flavor helps for anyone who was just looking at the percentage
changes.  The timings and timing differences really need to be bigger
than the precision in order to trust the percentage differences, which
just wasn't the case for t/perf here.  But t/perf was good enough to
show that the timings after my series are roughly the same as before
the series (never more than 0.01s slower).
diff mbox series

Patch

diff --git a/sparse-index.c b/sparse-index.c
index b82648b10ee..eed170cd8f7 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -341,18 +341,70 @@  void ensure_correct_sparsity(struct index_state *istate)
 		ensure_full_index(istate);
 }
 
+static int path_found(const char *path, const char **dirname, size_t *dir_len,
+		      int *dir_found)
+{
+	struct stat st;
+	char *newdir;
+	char *tmp;
+
+	/*
+	 * If dirname corresponds to a directory that doesn't exist, and this
+	 * path starts with dirname, then path can't exist.
+	 */
+	if (!*dir_found && !memcmp(path, *dirname, *dir_len))
+		return 0;
+
+	/*
+	 * If path itself exists, return 1.
+	 */
+	if (!lstat(path, &st))
+		return 1;
+
+	/*
+	 * Otherwise, path does not exist so we'll return 0...but we'll first
+	 * determine some info about its parent directory so we can avoid
+	 * lstat calls for future cache entries.
+	 */
+	newdir = strrchr(path, '/');
+	if (!newdir)
+		return 0; /* Didn't find a parent dir; just return 0 now. */
+
+	/*
+	 * If path starts with directory (which we already lstat'ed and found),
+	 * then no need to lstat parent directory again.
+	 */
+	if (*dir_found && *dirname && memcmp(path, *dirname, *dir_len))
+		return 0;
+
+	/* Free previous dirname, and cache path's dirname */
+	*dirname = path;
+	*dir_len = newdir - path + 1;
+
+	tmp = xstrndup(path, *dir_len);
+	*dir_found = !lstat(tmp, &st);
+	free(tmp);
+
+	return 0;
+}
+
 void clear_skip_worktree_from_present_files(struct index_state *istate)
 {
+	const char *last_dirname = NULL;
+	size_t dir_len = 0;
+	int dir_found = 1;
+
 	int i;
+
 	if (!core_apply_sparse_checkout)
 		return;
 
 restart:
 	for (i = 0; i < istate->cache_nr; i++) {
 		struct cache_entry *ce = istate->cache[i];
-		struct stat st;
 
-		if (ce_skip_worktree(ce) && !lstat(ce->name, &st)) {
+		if (ce_skip_worktree(ce) &&
+		    path_found(ce->name, &last_dirname, &dir_len, &dir_found)) {
 			if (S_ISSPARSEDIR(ce->ce_mode)) {
 				ensure_full_index(istate);
 				goto restart;