diff mbox series

[RFC,19/21] symlinks: make has_dirs_only_path() track FL_NOENT

Message ID e4aed47fa62cb2d7ed3411c5ef1464025572c2ec.1597093021.git.matheus.bernardino@usp.br (mailing list archive)
State New, archived
Headers show
Series Parallel checkout | expand

Commit Message

Matheus Tavares Aug. 10, 2020, 9:33 p.m. UTC
In the next patch, the parallel-checkout workers will be able to create
the leading directories of the entries being written by themselves, to
increase performance. But to do so, the main process will first need to
remove non-directory files that can potentially be in the way (the
reasoning is discussed in the next patch). This can be done without much
cost by calling has_dirs_only_path() for each path component until we
find the first one which is not a real directory, which should then be
removed. This operations is cheap because it doesn't have to call stat()
again for each component, as the information is already cached from the
previous call at entry.c:check_path().

However, when has_dirs_only_path() returns false, we don't know if the
component doesn't exist or if it exists as another file type. The best
we could do in this case would be to stat() the component again. When
there are many files to be checked out inside the same directory (yet
to be created by a worker), we would have to call stat() for the same
directory once for each path, even though there is nothing to be
unlinked there. We can skip this stat() calls by making
has_dirs_only_path() also ask for FL_NOENT caching, and converting its
return to a tri-state.

Note: since we are now caching FL_NOENT, we also need to manually
invalidate the cache when we create a directory in a path previously
cached as non-existent.

While we are here, also remove duplicated comments in
has_dirs_only_path() and check_leading_path().

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 cache.h             |  1 +
 entry.c             | 11 +++++++++--
 parallel-checkout.c |  3 +++
 symlinks.c          | 42 ++++++++++++++++++------------------------
 4 files changed, 31 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/cache.h b/cache.h
index e2b41c5f8b..7a08cd6f0e 100644
--- a/cache.h
+++ b/cache.h
@@ -1711,6 +1711,7 @@  int has_symlink_leading_path(const char *name, int len);
 int threaded_has_symlink_leading_path(struct cache_def *, const char *, int);
 int check_leading_path(const char *name, int len);
 int has_dirs_only_path(const char *name, int len, int prefix_len);
+void reset_default_lstat_cache(void);
 void schedule_dir_for_removal(const char *name, int len);
 void remove_scheduled_dirs(void);
 
diff --git a/entry.c b/entry.c
index 19f2c1d132..e876adff19 100644
--- a/entry.c
+++ b/entry.c
@@ -14,6 +14,7 @@  static void create_directories(const char *path, int path_len,
 {
 	char *buf = xmallocz(path_len);
 	int len = 0;
+	int reset_cache = 0;
 
 	while (len < path_len) {
 		do {
@@ -31,7 +32,7 @@  static void create_directories(const char *path, int path_len,
 		 * we test the path components of the prefix with the
 		 * stat() function instead of the lstat() function.
 		 */
-		if (has_dirs_only_path(buf, len, state->base_dir_len))
+		if (has_dirs_only_path(buf, len, state->base_dir_len) > 0)
 			continue; /* ok, it is already a directory. */
 
 		/*
@@ -45,8 +46,14 @@  static void create_directories(const char *path, int path_len,
 			    !unlink_or_warn(buf) && !mkdir(buf, 0777))
 				continue;
 			die_errno("cannot create directory at '%s'", buf);
+		} else {
+			/* The cache had FL_NOENT, but we now created a dir */
+			reset_cache = 1;
 		}
 	}
+
+	if (reset_cache)
+		reset_default_lstat_cache();
 	free(buf);
 }
 
@@ -406,7 +413,7 @@  static int check_path(const char *path, int len, struct stat *st, int skiplen)
 
 	while (path < slash && !is_dir_sep(*slash))
 		slash--;
-	if (!has_dirs_only_path(path, slash - path, skiplen)) {
+	if (has_dirs_only_path(path, slash - path, skiplen) <= 0) {
 		errno = ENOENT;
 		return -1;
 	}
diff --git a/parallel-checkout.c b/parallel-checkout.c
index fee93460c1..4d72540256 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -185,6 +185,9 @@  static int handle_results(struct checkout *state)
 
 	pc_status = PC_HANDLING_RESULTS;
 
+	/* Workers might have created dirs, so the cache must be invalidated */
+	reset_default_lstat_cache();
+
 	for (i = 0; i < parallel_checkout->nr; ++i) {
 		struct checkout_item *ci = &parallel_checkout->items[i];
 		struct stat *st = &ci->st;
diff --git a/symlinks.c b/symlinks.c
index 69d458a24d..3adf6ef8a1 100644
--- a/symlinks.c
+++ b/symlinks.c
@@ -47,6 +47,11 @@  static inline void reset_lstat_cache(struct cache_def *cache)
 	 */
 }
 
+void reset_default_lstat_cache(void)
+{
+	reset_lstat_cache(&default_cache);
+}
+
 #define FL_DIR      (1 << 0)
 #define FL_NOENT    (1 << 1)
 #define FL_SYMLINK  (1 << 2)
@@ -210,15 +215,6 @@  int has_symlink_leading_path(const char *name, int len)
 	return threaded_has_symlink_leading_path(&default_cache, name, len);
 }
 
-/*
- * Return zero if path 'name' has a leading symlink component or
- * if some leading path component does not exists.
- *
- * Return -1 if leading path exists and is a directory.
- *
- * Return path length if leading path exists and is neither a
- * directory nor a symlink.
- */
 int check_leading_path(const char *name, int len)
 {
 	return threaded_check_leading_path(&default_cache, name, len);
@@ -246,30 +242,28 @@  static int threaded_check_leading_path(struct cache_def *cache, const char *name
 		return match_len;
 }
 
-/*
- * Return non-zero if all path components of 'name' exists as a
- * directory.  If prefix_len > 0, we will test with the stat()
- * function instead of the lstat() function for a prefix length of
- * 'prefix_len', thus we then allow for symlinks in the prefix part as
- * long as those points to real existing directories.
- */
 int has_dirs_only_path(const char *name, int len, int prefix_len)
 {
 	return threaded_has_dirs_only_path(&default_cache, name, len, prefix_len);
 }
 
 /*
- * Return non-zero if all path components of 'name' exists as a
- * directory.  If prefix_len > 0, we will test with the stat()
- * function instead of the lstat() function for a prefix length of
- * 'prefix_len', thus we then allow for symlinks in the prefix part as
- * long as those points to real existing directories.
+ * Return a positive number if all path components of 'name' exist as
+ * directories, a negative number if a component does not exist, and 0 otherwise
+ * (e.g. a component exists but as another file type). If prefix_len > 0, we
+ * will test with the stat() function instead of the lstat() function for a
+ * prefix length of 'prefix_len', thus we return +1 for symlinks in the prefix
+ * part as long as those points to real existing directories.
  */
 static int threaded_has_dirs_only_path(struct cache_def *cache, const char *name, int len, int prefix_len)
 {
-	return lstat_cache(cache, name, len,
-			   FL_DIR|FL_FULLPATH, prefix_len) &
-		FL_DIR;
+	int flags = lstat_cache(cache, name, len,
+				FL_NOENT|FL_DIR|FL_FULLPATH, prefix_len);
+	if (flags & FL_DIR)
+		return 1;
+	if (flags & FL_NOENT)
+		return -1;
+	return 0;
 }
 
 static struct strbuf removal = STRBUF_INIT;