diff mbox series

[v2,7/7] unpack-trees: improve performance of next_cache_entry

Message ID aa963eefae75d41983201e24398bc5692267b91b.1633440057.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse Index: integrate with reset | expand

Commit Message

Victoria Dye Oct. 5, 2021, 1:20 p.m. UTC
From: Victoria Dye <vdye@github.com>

To find the first non-unpacked cache entry, `next_cache_entry` iterates
through index, starting at `cache_bottom`. The performance of this in full
indexes is helped by `cache_bottom` advancing with each invocation of
`mark_ce_used` (called by `unpack_index_entry`). However, the presence of
sparse directories can prevent the `cache_bottom` from advancing in a sparse
index case, effectively forcing `next_cache_entry` to search from the
beginning of the index each time it is called.

The `cache_bottom` must be preserved for the sparse index (see 17a1bb570b
(unpack-trees: preserve cache_bottom, 2021-07-14)).  Therefore, to retain
the benefit `cache_bottom` provides in non-sparse index cases, a separate
`hint` position indicates the first position `next_cache_entry` should
search, updated each execution with a new position.  The performance of `git
reset -- does-not-exist` (testing the "worst case" in which all entries in
the index are unpacked with `next_cache_entry`) is significantly improved
for the sparse index case:

Test          before            after
------------------------------------------------------
(full-v3)     0.79(0.38+0.30)   0.91(0.43+0.34) +15.2%
(full-v4)     0.80(0.38+0.29)   0.85(0.40+0.35) +6.2%
(sparse-v3)   0.76(0.43+0.69)   0.44(0.08+0.67) -42.1%
(sparse-v4)   0.71(0.40+0.65)   0.41(0.09+0.65) -42.3%

Signed-off-by: Victoria Dye <vdye@github.com>
---
 unpack-trees.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

Comments

Bagas Sanjaya Oct. 6, 2021, 10:37 a.m. UTC | #1
On 05/10/21 20.20, Victoria Dye via GitGitGadget wrote:
> The `cache_bottom` must be preserved for the sparse index (see 17a1bb570b
> (unpack-trees: preserve cache_bottom, 2021-07-14)).  Therefore, to retain
> the benefit `cache_bottom` provides in non-sparse index cases, a separate
> `hint` position indicates the first position `next_cache_entry` should
> search, updated each execution with a new position.  The performance of `git
> reset -- does-not-exist` (testing the "worst case" in which all entries in
> the index are unpacked with `next_cache_entry`) is significantly improved
> for the sparse index case:

Did you mean `a separate `hint` ... should be searched`?
diff mbox series

Patch

diff --git a/unpack-trees.c b/unpack-trees.c
index 8ea0a542da8..b94733de6be 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -645,17 +645,24 @@  static void mark_ce_used_same_name(struct cache_entry *ce,
 	}
 }
 
-static struct cache_entry *next_cache_entry(struct unpack_trees_options *o)
+static struct cache_entry *next_cache_entry(struct unpack_trees_options *o, int *hint)
 {
 	const struct index_state *index = o->src_index;
 	int pos = o->cache_bottom;
 
+	if (*hint > pos)
+		pos = *hint;
+
 	while (pos < index->cache_nr) {
 		struct cache_entry *ce = index->cache[pos];
-		if (!(ce->ce_flags & CE_UNPACKED))
+		if (!(ce->ce_flags & CE_UNPACKED)) {
+			*hint = pos + 1;
 			return ce;
+		}
 		pos++;
 	}
+
+	*hint = pos;
 	return NULL;
 }
 
@@ -1365,12 +1372,13 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 
 	/* Are we supposed to look at the index too? */
 	if (o->merge) {
+		int hint = -1;
 		while (1) {
 			int cmp;
 			struct cache_entry *ce;
 
 			if (o->diff_index_cached)
-				ce = next_cache_entry(o);
+				ce = next_cache_entry(o, &hint);
 			else
 				ce = find_cache_entry(info, p);
 
@@ -1690,7 +1698,7 @@  static int verify_absent(const struct cache_entry *,
 int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o)
 {
 	struct repository *repo = the_repository;
-	int i, ret;
+	int i, hint, ret;
 	static struct cache_entry *dfc;
 	struct pattern_list pl;
 	int free_pattern_list = 0;
@@ -1763,13 +1771,15 @@  int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 		info.pathspec = o->pathspec;
 
 		if (o->prefix) {
+			hint = -1;
+
 			/*
 			 * Unpack existing index entries that sort before the
 			 * prefix the tree is spliced into.  Note that o->merge
 			 * is always true in this case.
 			 */
 			while (1) {
-				struct cache_entry *ce = next_cache_entry(o);
+				struct cache_entry *ce = next_cache_entry(o, &hint);
 				if (!ce)
 					break;
 				if (ce_in_traverse_path(ce, &info))
@@ -1790,8 +1800,9 @@  int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
 
 	/* Any left-over entries in the index? */
 	if (o->merge) {
+		hint = -1;
 		while (1) {
-			struct cache_entry *ce = next_cache_entry(o);
+			struct cache_entry *ce = next_cache_entry(o, &hint);
 			if (!ce)
 				break;
 			if (unpack_index_entry(ce, o) < 0)