@@ -57,6 +57,43 @@ static void create_directories(const char *path, int path_len,
free(buf);
}
+static void remove_non_dirs(const char *path, int path_len,
+ const struct checkout *state)
+{
+ char *buf = xmallocz(path_len);
+ int len = 0;
+
+ while (len < path_len) {
+ int ret;
+
+ do {
+ buf[len] = path[len];
+ len++;
+ } while (len < path_len && !is_dir_sep(path[len]));
+ if (len >= path_len)
+ break;
+ buf[len] = 0;
+
+ ret = has_dirs_only_path(buf, len, state->base_dir_len);
+
+ if (ret > 0)
+ continue; /* Is directory. */
+ if (ret < 0)
+ break; /* No entry */
+
+ /* ret == 0: not a directory, let's unlink it. */
+
+ if (!state->force)
+ die("'%s' already exists, and it's not a directory", buf);
+
+ if (unlink(buf))
+ die_errno("cannot unlink '%s'", buf);
+ else
+ break;
+ }
+ free(buf);
+}
+
static void remove_subtree(struct strbuf *path)
{
DIR *dir = opendir(path->buf);
@@ -555,8 +592,6 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
} else if (state->not_new)
return 0;
- create_directories(path.buf, path.len, state);
-
if (nr_checkouts)
(*nr_checkouts)++;
@@ -565,9 +600,13 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
ca = &ca_buf;
}
- if (!enqueue_checkout(ce, ca))
+ if (!enqueue_checkout(ce, ca)) {
+ /* "clean" path so that workers can create leading dirs */
+ remove_non_dirs(path.buf, path.len, state);
return 0;
+ }
+ create_directories(path.buf, path.len, state);
return write_entry(ce, path.buf, ca, state, 0);
}
@@ -298,20 +298,48 @@ static int close_and_clear(int *fd)
return ret;
}
+struct ci_open_data {
+ int fd;
+ unsigned int mode;
+};
+
+static int ci_open(const char *path, void *cb)
+{
+ struct ci_open_data *data = cb;
+ data->fd = open(path, O_WRONLY | O_CREAT | O_EXCL, data->mode);
+
+ if (data->fd < 0) {
+ /*
+ * EISDIR can only indicate path collisions among the entries
+ * being checked out. We don't need raceproof_create_file() to
+ * try removing empty dirs. Instead, just let the caller known
+ * that the path already exists, so that the collision can be
+ * properly handled later.
+ */
+ if (errno == EISDIR)
+ errno = EEXIST;
+ return 1;
+ }
+
+ return 0;
+}
+
void write_checkout_item(struct checkout *state, struct checkout_item *ci)
{
- unsigned int mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
+ struct ci_open_data open_data;
int fd = -1, fstat_done = 0;
struct strbuf path = STRBUF_INIT;
+ open_data.mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
strbuf_add(&path, state->base_dir, state->base_dir_len);
strbuf_add(&path, ci->ce->name, ci->ce->ce_namelen);
- fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode);
-
- if (fd < 0) {
- if (errno == EEXIST || errno == EISDIR || errno == ENOENT ||
- errno == ENOTDIR) {
+ /*
+ * The main process already removed any non-directory file that was in
+ * the way. So if we find one, it's a path collision.
+ */
+ if (raceproof_create_file(path.buf, ci_open, &open_data)) {
+ if (errno == EEXIST || errno == ENOTDIR || errno == ENOENT) {
/*
* Errors which probably represent a path collision.
* Suppress the error message and mark the ci to be
@@ -325,6 +353,8 @@ void write_checkout_item(struct checkout *state, struct checkout_item *ci)
goto out;
}
+ fd = open_data.fd;
+
if (write_checkout_item_to_fd(fd, state, ci, path.buf)) {
/* Error was already reported. */
ci->status = CI_FAILED;
Allow the parallel workers to create the leading directories of the entries being checked out, instead of pre-creating them in the main process. This optimization should be more effective on file systems with higher I/O latency. Part of the process of creating leading dirs is the removal of any non-directory file that could be in the way. This is currently done inside entry.c:create_directories(). However, if we were to move this to the workers as well, we would risk removing a file just written by another worker, which collided with the one currently being written. In a worse scenario, we could remove the file right after a worker have closed it but before it called stat(). To avoid these problems, let's remove the non-directory files in the main process. And to avoid the cost of extra lstat() calls in this process, we use has_dirs_only_path(), which will have the necessary information already cached from check_path(). Finally, to create the leading dirs in the workers, we could re-use create_directories(). But, unlike the main process, we wouldn't have the stat() information cached. Thus, let's use raceproof_create_file(), which will only stat() the path components after a open() failure, saving us time when creating subsequent files in the same directory. Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br> --- entry.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- parallel-checkout.c | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 78 insertions(+), 9 deletions(-)