diff mbox series

[v2,8/8,RFC] dir: reported number of visited directories and paths with trace2

Message ID fba4d65b78c724eba7ae1fbc286ac29a210d6ac8.1620432501.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Directory traversal fixes | expand

Commit Message

Elijah Newren May 8, 2021, 12:08 a.m. UTC
From: Elijah Newren <newren@gmail.com>

Previously, tests that wanted to verify that we don't traverse into a
deep directory hierarchy that is ignored had no easy way to verify and
enforce that behavior.  Record information about the number of
directories and paths we inspect while traversing the directory
hierarchy in read_directory(), and when trace2 is enabled, print these
statistics.

Make use of these statistics in t7300 to simplify (and vastly improve
the performance of) the "avoid traversing into ignored directories"
test.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 dir.c                             |  8 ++++++
 dir.h                             |  4 +++
 t/t7063-status-untracked-cache.sh |  1 +
 t/t7300-clean.sh                  | 46 ++++++++++---------------------
 4 files changed, 27 insertions(+), 32 deletions(-)
diff mbox series

Patch

diff --git a/dir.c b/dir.c
index 23c71ab7e9a1..896a9a62b2c7 100644
--- a/dir.c
+++ b/dir.c
@@ -2455,6 +2455,7 @@  static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 
 	if (open_cached_dir(&cdir, dir, untracked, istate, &path, check_only))
 		goto out;
+	dir->visited_directories++;
 
 	if (untracked)
 		untracked->check_only = !!check_only;
@@ -2463,6 +2464,7 @@  static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		/* check how the file or directory should be treated */
 		state = treat_path(dir, untracked, &cdir, istate, &path,
 				   baselen, pathspec);
+		dir->visited_paths++;
 
 		if (state > dir_state)
 			dir_state = state;
@@ -2778,6 +2780,10 @@  static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 static void trace2_read_directory_statistics(struct dir_struct *dir,
 					     struct repository *repo)
 {
+	trace2_data_intmax("read_directory", repo,
+			   "directories-visited", dir->visited_directories);
+	trace2_data_intmax("read_directory", repo,
+			   "paths-visited", dir->visited_paths);
 	if (!dir->untracked)
 		return;
 	trace2_data_intmax("read_directory", repo,
@@ -2798,6 +2804,8 @@  int read_directory(struct dir_struct *dir, struct index_state *istate,
 	struct untracked_cache_dir *untracked;
 
 	trace2_region_enter("dir", "read_directory", istate->repo);
+	dir->visited_paths = 0;
+	dir->visited_directories = 0;
 
 	if (has_symlink_leading_path(path, len)) {
 		trace_performance_leave("read directory %.*s", len, path);
diff --git a/dir.h b/dir.h
index 04d886cfce75..22c67907f689 100644
--- a/dir.h
+++ b/dir.h
@@ -336,6 +336,10 @@  struct dir_struct {
 	struct oid_stat ss_info_exclude;
 	struct oid_stat ss_excludes_file;
 	unsigned unmanaged_exclude_files;
+
+	/* Stats about the traversal */
+	unsigned visited_paths;
+	unsigned visited_directories;
 };
 
 /*Count the number of slashes for string s*/
diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh
index 6bce65b439e3..1517c316892f 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -65,6 +65,7 @@  get_relevant_traces() {
 	INPUT_FILE=$1
 	OUTPUT_FILE=$2
 	grep data.*read_directo $INPUT_FILE \
+	    | grep -v visited \
 	    | cut -d "|" -f 9 \
 	    >$OUTPUT_FILE
 }
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index c2a3b7b6a52b..2c10a7b64f11 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -747,42 +747,24 @@  test_expect_success 'clean untracked paths by pathspec' '
 '
 
 test_expect_success 'avoid traversing into ignored directories' '
-	test_when_finished rm -f output error &&
+	test_when_finished rm -f output error trace.* &&
 	test_create_repo avoid-traversing-deep-hierarchy &&
 	(
 		cd avoid-traversing-deep-hierarchy &&
 
-		>directory-random-file.txt &&
-		# Put this file under directory400/directory399/.../directory1/
-		depth=400 &&
-		for x in $(test_seq 1 $depth); do
-			mkdir "tmpdirectory$x" &&
-			mv directory* "tmpdirectory$x" &&
-			mv "tmpdirectory$x" "directory$x"
-		done &&
-
-		git clean -ffdxn -e directory$depth >../output 2>../error &&
-
-		test_must_be_empty ../output &&
-		# We especially do not want things like
-		#   "warning: could not open directory "
-		# appearing in the error output.  It is true that directories
-		# that are too long cannot be opened, but we should not be
-		# recursing into those directories anyway since the very first
-		# level is ignored.
-		test_must_be_empty ../error &&
-
-		# alpine-linux-musl fails to "rm -rf" a directory with such
-		# a deeply nested hierarchy.  Help it out by deleting the
-		# leading directories ourselves.  Super slow, but, what else
-		# can we do?  Without this, we will hit a
-		#     error: Tests passed but test cleanup failed; aborting
-		# so do this ugly manual cleanup...
-		while test ! -f directory-random-file.txt; do
-			name=$(ls -d directory*) &&
-			mv $name/* . &&
-			rmdir $name
-		done
+		mkdir -p untracked/subdir/with/a &&
+		>untracked/subdir/with/a/random-file.txt &&
+
+		GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.output" \
+		git clean -ffdxn -e untracked &&
+
+		grep data.*read_directo.*visited ../trace.output \
+			| cut -d "|" -f 9 >../trace.relevant &&
+		cat >../trace.expect <<-EOF &&
+		 directories-visited:1
+		 paths-visited:4
+		EOF
+		test_cmp ../trace.expect ../trace.relevant
 	)
 '