diff mbox series

[4/5] sparse-index: count lstat() calls

Message ID 88a3145e585169fde8cd7d43a435daa07eb82667.1718899877.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series sparse-index: improve clear_skip_worktree_from_present_files() | expand

Commit Message

Derrick Stolee June 20, 2024, 4:11 p.m. UTC
From: Derrick Stolee <stolee@gmail.com>

The clear_skip_worktree.. methods already report some statistics about
how many cache entries are checked against path_found() due to having
the skip-worktree bit set. However, due to path_found() performing some
caching, this isn't the only information that would be helpful to
report.

Add a new lstat_count member to the path_found_data struct to count the
number of times path_found() calls lstat(). This will be helpful to help
explain performance problems in this method as well as to demonstrate
future changes to the caching algorithm in a more concrete way than
end-to-end timings.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 sparse-index.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

Elijah Newren June 24, 2024, 10:13 p.m. UTC | #1
On Thu, Jun 20, 2024 at 10:12 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <stolee@gmail.com>
>
> The clear_skip_worktree.. methods already report some statistics about
> how many cache entries are checked against path_found() due to having
> the skip-worktree bit set. However, due to path_found() performing some
> caching, this isn't the only information that would be helpful to
> report.
>
> Add a new lstat_count member to the path_found_data struct to count the
> number of times path_found() calls lstat(). This will be helpful to help
> explain performance problems in this method as well as to demonstrate
> future changes to the caching algorithm in a more concrete way than
> end-to-end timings.
>
> Signed-off-by: Derrick Stolee <stolee@gmail.com>
> ---
>  sparse-index.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/sparse-index.c b/sparse-index.c
> index fec4f393360..8577fa726b8 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -442,6 +442,7 @@ void ensure_correct_sparsity(struct index_state *istate)
>  struct path_found_data {
>         struct strbuf dir;
>         int dir_found;
> +       size_t lstat_count;
>  };
>
>  #define PATH_FOUND_DATA_INIT { \
> @@ -469,6 +470,7 @@ static int path_found(const char *path, struct path_found_data *data)
>         /*
>          * If path itself exists, return 1.
>          */
> +       data->lstat_count++;
>         if (!lstat(path, &st))
>                 return 1;
>
> @@ -493,6 +495,7 @@ static int path_found(const char *path, struct path_found_data *data)
>         strbuf_reset(&data->dir);
>         strbuf_add(&data->dir, path, newdir - path + 1);
>
> +       data->lstat_count++;
>         data->dir_found = !lstat(data->dir.buf, &st);
>
>         return 0;
> @@ -524,6 +527,8 @@ static int clear_skip_worktree_from_present_files_sparse(struct index_state *ist
>
>         trace2_data_intmax("index", istate->repo,
>                            "sparse_path_count", path_count);
> +       trace2_data_intmax("index", istate->repo,
> +                          "sparse_lstat_count", data.lstat_count);
>         trace2_region_leave("index", "clear_skip_worktree_from_present_files_sparse",
>                             istate->repo);
>         clear_path_found_data(&data);
> @@ -553,6 +558,8 @@ static void clear_skip_worktree_from_present_files_full(struct index_state *ista
>
>         trace2_data_intmax("index", istate->repo,
>                            "full_path_count", path_count);
> +       trace2_data_intmax("index", istate->repo,
> +                          "full_lstat_count", data.lstat_count);
>         trace2_region_leave("index", "clear_skip_worktree_from_present_files_full",
>                             istate->repo);
>         clear_path_found_data(&data);
> --
> gitgitgadget

Makes sense.  I'm getting really curious to see how this all fits into
the big improvements you made; I guess I'll see that in the next
patch...
diff mbox series

Patch

diff --git a/sparse-index.c b/sparse-index.c
index fec4f393360..8577fa726b8 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -442,6 +442,7 @@  void ensure_correct_sparsity(struct index_state *istate)
 struct path_found_data {
 	struct strbuf dir;
 	int dir_found;
+	size_t lstat_count;
 };
 
 #define PATH_FOUND_DATA_INIT { \
@@ -469,6 +470,7 @@  static int path_found(const char *path, struct path_found_data *data)
 	/*
 	 * If path itself exists, return 1.
 	 */
+	data->lstat_count++;
 	if (!lstat(path, &st))
 		return 1;
 
@@ -493,6 +495,7 @@  static int path_found(const char *path, struct path_found_data *data)
 	strbuf_reset(&data->dir);
 	strbuf_add(&data->dir, path, newdir - path + 1);
 
+	data->lstat_count++;
 	data->dir_found = !lstat(data->dir.buf, &st);
 
 	return 0;
@@ -524,6 +527,8 @@  static int clear_skip_worktree_from_present_files_sparse(struct index_state *ist
 
 	trace2_data_intmax("index", istate->repo,
 			   "sparse_path_count", path_count);
+	trace2_data_intmax("index", istate->repo,
+			   "sparse_lstat_count", data.lstat_count);
 	trace2_region_leave("index", "clear_skip_worktree_from_present_files_sparse",
 			    istate->repo);
 	clear_path_found_data(&data);
@@ -553,6 +558,8 @@  static void clear_skip_worktree_from_present_files_full(struct index_state *ista
 
 	trace2_data_intmax("index", istate->repo,
 			   "full_path_count", path_count);
+	trace2_data_intmax("index", istate->repo,
+			   "full_lstat_count", data.lstat_count);
 	trace2_region_leave("index", "clear_skip_worktree_from_present_files_full",
 			    istate->repo);
 	clear_path_found_data(&data);