[RFC,20/21] parallel-checkout: create leading dirs in workers
diff mbox series

Message ID d9e8bad5d4e6cf3c44c6e48d28c40536afcba115.1597093021.git.matheus.bernardino@usp.br
State New
Headers show
Series
  • Parallel checkout
Related show

Commit Message

Matheus Tavares Aug. 10, 2020, 9:33 p.m. UTC
Allow the parallel workers to create the leading directories of the
entries being checked out, instead of pre-creating them in the main
process. This optimization should be more effective on file systems with
higher I/O latency.

Part of the process of creating leading dirs is the removal of any
non-directory file that could be in the way. This is currently done
inside entry.c:create_directories(). However, if we were to move this to
the workers as well, we would risk removing a file just written by
another worker, which collided with the one currently being written.  In
a worse scenario, we could remove the file right after a worker have
closed it but before it called stat(). To avoid these problems, let's
remove the non-directory files in the main process. And to avoid the
cost of extra lstat() calls in this process, we use
has_dirs_only_path(), which will have the necessary information already
cached from check_path().

Finally, to create the leading dirs in the workers, we could re-use
create_directories(). But, unlike the main process, we wouldn't have the
stat() information cached. Thus, let's use raceproof_create_file(),
which will only stat() the path components after a open() failure,
saving us time when creating subsequent files in the same directory.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 entry.c             | 45 ++++++++++++++++++++++++++++++++++++++++++---
 parallel-checkout.c | 42 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 78 insertions(+), 9 deletions(-)

Patch
diff mbox series

diff --git a/entry.c b/entry.c
index e876adff19..5dfd4d150d 100644
--- a/entry.c
+++ b/entry.c
@@ -57,6 +57,43 @@  static void create_directories(const char *path, int path_len,
 	free(buf);
 }
 
+static void remove_non_dirs(const char *path, int path_len,
+			    const struct checkout *state)
+{
+	char *buf = xmallocz(path_len);
+	int len = 0;
+
+	while (len < path_len) {
+		int ret;
+
+		do {
+			buf[len] = path[len];
+			len++;
+		} while (len < path_len && !is_dir_sep(path[len]));
+		if (len >= path_len)
+			break;
+		buf[len] = 0;
+
+		ret = has_dirs_only_path(buf, len, state->base_dir_len);
+
+		if (ret > 0)
+			continue; /* Is directory. */
+		if (ret < 0)
+			break; /* No entry */
+
+		/* ret == 0: not a directory, let's unlink it. */
+
+		if (!state->force)
+			die("'%s' already exists, and it's not a directory", buf);
+
+		if (unlink(buf))
+			die_errno("cannot unlink '%s'", buf);
+		else
+			break;
+	}
+	free(buf);
+}
+
 static void remove_subtree(struct strbuf *path)
 {
 	DIR *dir = opendir(path->buf);
@@ -555,8 +592,6 @@  int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
 	} else if (state->not_new)
 		return 0;
 
-	create_directories(path.buf, path.len, state);
-
 	if (nr_checkouts)
 		(*nr_checkouts)++;
 
@@ -565,9 +600,13 @@  int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
 		ca = &ca_buf;
 	}
 
-	if (!enqueue_checkout(ce, ca))
+	if (!enqueue_checkout(ce, ca)) {
+		/* "clean" path so that workers can create leading dirs */
+		remove_non_dirs(path.buf, path.len, state);
 		return 0;
+	}
 
+	create_directories(path.buf, path.len, state);
 	return write_entry(ce, path.buf, ca, state, 0);
 }
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 4d72540256..5b73d8fa4b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -298,20 +298,48 @@  static int close_and_clear(int *fd)
 	return ret;
 }
 
+struct ci_open_data {
+	int fd;
+	unsigned int mode;
+};
+
+static int ci_open(const char *path, void *cb)
+{
+	struct ci_open_data *data = cb;
+	data->fd = open(path, O_WRONLY | O_CREAT | O_EXCL, data->mode);
+
+	if (data->fd < 0) {
+		/*
+		 * EISDIR can only indicate path collisions among the entries
+		 * being checked out. We don't need raceproof_create_file() to
+		 * try removing empty dirs. Instead, just let the caller known
+		 * that the path already exists, so that the collision can be
+		 * properly handled later.
+		 */
+		if (errno == EISDIR)
+			errno = EEXIST;
+		return 1;
+	}
+
+	return 0;
+}
+
 void write_checkout_item(struct checkout *state, struct checkout_item *ci)
 {
-	unsigned int mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
+	struct ci_open_data open_data;
 	int fd = -1, fstat_done = 0;
 	struct strbuf path = STRBUF_INIT;
 
+	open_data.mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
 	strbuf_add(&path, state->base_dir, state->base_dir_len);
 	strbuf_add(&path, ci->ce->name, ci->ce->ce_namelen);
 
-	fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode);
-
-	if (fd < 0) {
-		if (errno == EEXIST || errno == EISDIR || errno == ENOENT ||
-		    errno == ENOTDIR) {
+	/*
+	 * The main process already removed any non-directory file that was in
+	 * the way. So if we find one, it's a path collision.
+	 */
+	if (raceproof_create_file(path.buf, ci_open, &open_data)) {
+		if (errno == EEXIST || errno == ENOTDIR || errno == ENOENT) {
 			/*
 			 * Errors which probably represent a path collision.
 			 * Suppress the error message and mark the ci to be
@@ -325,6 +353,8 @@  void write_checkout_item(struct checkout *state, struct checkout_item *ci)
 		goto out;
 	}
 
+	fd = open_data.fd;
+
 	if (write_checkout_item_to_fd(fd, state, ci, path.buf)) {
 		/* Error was already reported. */
 		ci->status = CI_FAILED;