diff mbox series

[WIP,RFC,5/7] clone: use dir-iterator to avoid explicit dir traversal

Message ID 20190226002625.13022-6-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series clone: dir iterator refactoring with tests | expand

Commit Message

Ævar Arnfjörð Bjarmason Feb. 26, 2019, 12:26 a.m. UTC
From: Matheus Tavares <matheus.bernardino@usp.br>

Replace usage of opendir/readdir/closedir API to traverse directories
recursively, at copy_or_link_directory function, by the dir-iterator
API. This simplifies the code and avoid recursive calls to
copy_or_link_directory.

[Ævar: This should be bug-compatible with the existing "clone"
behavior. The whole bit here with "iter->relative_path[0] == '.'" is a
dirty hack. We don't copy dot-dirs, and then later on just blindly
ignore ENOENT errors as we descend into them. That case really wants
to be a is_dotdir_or_file_within() test instead]

Now, copy_or_link_directory will call die() in case of an error on
openddir, readdir or lstat, inside dir_iterator_advance. That means it
will abort in case of an error trying to fetch any iteration entry.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 builtin/clone.c | 55 +++++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 22 deletions(-)

Comments

Matheus Tavares Feb. 26, 2019, 3:48 a.m. UTC | #1
On Mon, Feb 25, 2019 at 9:26 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
> From: Matheus Tavares <matheus.bernardino@usp.br>
>
> Replace usage of opendir/readdir/closedir API to traverse directories
> recursively, at copy_or_link_directory function, by the dir-iterator
> API. This simplifies the code and avoid recursive calls to
> copy_or_link_directory.
>
> [Ævar: This should be bug-compatible with the existing "clone"
> behavior. The whole bit here with "iter->relative_path[0] == '.'" is a
> dirty hack. We don't copy dot-dirs, and then later on just blindly
> ignore ENOENT errors as we descend into them. That case really wants
> to be a is_dotdir_or_file_within() test instead]
>
> Now, copy_or_link_directory will call die() in case of an error on
> openddir, readdir or lstat, inside dir_iterator_advance. That means it
> will abort in case of an error trying to fetch any iteration entry.
>
> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>  builtin/clone.c | 55 +++++++++++++++++++++++++++++--------------------
>  1 file changed, 33 insertions(+), 22 deletions(-)
>
> diff --git a/builtin/clone.c b/builtin/clone.c
> index 862d2ea69c..c32e9022b3 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -23,6 +23,8 @@
>  #include "transport.h"
>  #include "strbuf.h"
>  #include "dir.h"
> +#include "dir-iterator.h"
> +#include "iterator.h"
>  #include "sigchain.h"
>  #include "branch.h"
>  #include "remote.h"
> @@ -411,42 +413,47 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
>  }
>
>  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
> -                                  const char *src_repo, int src_baselen)
> +                                  const char *src_repo)
>  {
> -       struct dirent *de;
> -       struct stat buf;
>         int src_len, dest_len;
> -       DIR *dir;
> -
> -       dir = opendir(src->buf);
> -       if (!dir)
> -               die_errno(_("failed to open '%s'"), src->buf);
> +       struct dir_iterator *iter;
> +       int iter_status;
> +       struct stat st;
>
>         mkdir_if_missing(dest->buf, 0777);
>
> +       iter = dir_iterator_begin(src->buf, 1);
> +
>         strbuf_addch(src, '/');
>         src_len = src->len;
>         strbuf_addch(dest, '/');
>         dest_len = dest->len;
>
> -       while ((de = readdir(dir)) != NULL) {
> +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
>                 strbuf_setlen(src, src_len);
> -               strbuf_addstr(src, de->d_name);
> +               strbuf_addstr(src, iter->relative_path);
>                 strbuf_setlen(dest, dest_len);
> -               strbuf_addstr(dest, de->d_name);
> -               if (stat(src->buf, &buf)) {
> +               strbuf_addstr(dest, iter->relative_path);
> +
> +               /*
> +                * dir_iterator_advance already calls lstat to populate iter->st
> +                * but, unlike stat, lstat does not checks for permissions on
> +                * the given path.
> +                */
> +               if (stat(src->buf, &st)) {
>                         warning (_("failed to stat %s\n"), src->buf);
>                         continue;
>                 }
> -               if (S_ISDIR(buf.st_mode)) {
> -                       if (de->d_name[0] != '.')
> -                               copy_or_link_directory(src, dest,
> -                                                      src_repo, src_baselen);
> +
> +               if (S_ISDIR(iter->st.st_mode)) {
> +                       if (iter->relative_path[0] == '.')

I think it should be iter->basename[0] here, instead, right?

I also have a more conceptual question here: This additions (or the
is_dotdir_of_file_within as suggested) are just to make patch
compatible with the current behaviour, but are going to be removed
soon after. As this would be kind of a noise, wouldn't it be better to
have a patch before this, already correcting copy_or_link_directory's
behaviour on hidden dirs and them this?

> +                               continue;
> +                       mkdir_if_missing(dest->buf, 0777);
>                         continue;
>                 }
>
>                 /* Files that cannot be copied bit-for-bit... */
> -               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
> +               if (!strcmp(iter->relative_path, "info/alternates")) {
>                         copy_alternates(src, dest, src_repo);
>                         continue;
>                 }
> @@ -456,14 +463,18 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>                 if (!option_no_hardlinks) {
>                         if (!link(src->buf, dest->buf))
>                                 continue;
> -                       if (option_local > 0)
> -                               die_errno(_("failed to create link '%s'"), dest->buf);
> +                       if (option_local > 0 && errno != ENOENT)
> +                               warning_errno(_("failed to create link '%s'"), dest->buf);
>                         option_no_hardlinks = 1;
>                 }
> -               if (copy_file_with_time(dest->buf, src->buf, 0666))
> +               if (copy_file_with_time(dest->buf, src->buf, 0666) && errno != ENOENT)
>                         die_errno(_("failed to copy file to '%s'"), dest->buf);
>         }
> -       closedir(dir);
> +
> +       if (iter_status != ITER_DONE) {
> +               strbuf_setlen(src, src_len);
> +               die(_("failed to iterate over '%s'"), src->buf);
> +       }
>  }
>
>  static void clone_local(const char *src_repo, const char *dest_repo)
> @@ -481,7 +492,7 @@ static void clone_local(const char *src_repo, const char *dest_repo)
>                 get_common_dir(&dest, dest_repo);
>                 strbuf_addstr(&src, "/objects");
>                 strbuf_addstr(&dest, "/objects");
> -               copy_or_link_directory(&src, &dest, src_repo, src.len);
> +               copy_or_link_directory(&src, &dest, src_repo);
>                 strbuf_release(&src);
>                 strbuf_release(&dest);
>         }
> --
> 2.21.0.rc2.1.g2d5e20a900.dirty
>
Ævar Arnfjörð Bjarmason Feb. 26, 2019, 11:33 a.m. UTC | #2
On Tue, Feb 26 2019, Matheus Tavares Bernardino wrote:

> On Mon, Feb 25, 2019 at 9:26 PM Ævar Arnfjörð Bjarmason
> <avarab@gmail.com> wrote:
>>
>> From: Matheus Tavares <matheus.bernardino@usp.br>
>>
>> Replace usage of opendir/readdir/closedir API to traverse directories
>> recursively, at copy_or_link_directory function, by the dir-iterator
>> API. This simplifies the code and avoid recursive calls to
>> copy_or_link_directory.
>>
>> [Ævar: This should be bug-compatible with the existing "clone"
>> behavior. The whole bit here with "iter->relative_path[0] == '.'" is a
>> dirty hack. We don't copy dot-dirs, and then later on just blindly
>> ignore ENOENT errors as we descend into them. That case really wants
>> to be a is_dotdir_or_file_within() test instead]
>>
>> Now, copy_or_link_directory will call die() in case of an error on
>> openddir, readdir or lstat, inside dir_iterator_advance. That means it
>> will abort in case of an error trying to fetch any iteration entry.
>>
>> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>  builtin/clone.c | 55 +++++++++++++++++++++++++++++--------------------
>>  1 file changed, 33 insertions(+), 22 deletions(-)
>>
>> diff --git a/builtin/clone.c b/builtin/clone.c
>> index 862d2ea69c..c32e9022b3 100644
>> --- a/builtin/clone.c
>> +++ b/builtin/clone.c
>> @@ -23,6 +23,8 @@
>>  #include "transport.h"
>>  #include "strbuf.h"
>>  #include "dir.h"
>> +#include "dir-iterator.h"
>> +#include "iterator.h"
>>  #include "sigchain.h"
>>  #include "branch.h"
>>  #include "remote.h"
>> @@ -411,42 +413,47 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
>>  }
>>
>>  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>> -                                  const char *src_repo, int src_baselen)
>> +                                  const char *src_repo)
>>  {
>> -       struct dirent *de;
>> -       struct stat buf;
>>         int src_len, dest_len;
>> -       DIR *dir;
>> -
>> -       dir = opendir(src->buf);
>> -       if (!dir)
>> -               die_errno(_("failed to open '%s'"), src->buf);
>> +       struct dir_iterator *iter;
>> +       int iter_status;
>> +       struct stat st;
>>
>>         mkdir_if_missing(dest->buf, 0777);
>>
>> +       iter = dir_iterator_begin(src->buf, 1);
>> +
>>         strbuf_addch(src, '/');
>>         src_len = src->len;
>>         strbuf_addch(dest, '/');
>>         dest_len = dest->len;
>>
>> -       while ((de = readdir(dir)) != NULL) {
>> +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
>>                 strbuf_setlen(src, src_len);
>> -               strbuf_addstr(src, de->d_name);
>> +               strbuf_addstr(src, iter->relative_path);
>>                 strbuf_setlen(dest, dest_len);
>> -               strbuf_addstr(dest, de->d_name);
>> -               if (stat(src->buf, &buf)) {
>> +               strbuf_addstr(dest, iter->relative_path);
>> +
>> +               /*
>> +                * dir_iterator_advance already calls lstat to populate iter->st
>> +                * but, unlike stat, lstat does not checks for permissions on
>> +                * the given path.
>> +                */
>> +               if (stat(src->buf, &st)) {
>>                         warning (_("failed to stat %s\n"), src->buf);
>>                         continue;
>>                 }
>> -               if (S_ISDIR(buf.st_mode)) {
>> -                       if (de->d_name[0] != '.')
>> -                               copy_or_link_directory(src, dest,
>> -                                                      src_repo, src_baselen);
>> +
>> +               if (S_ISDIR(iter->st.st_mode)) {
>> +                       if (iter->relative_path[0] == '.')
>
> I think it should be iter->basename[0] here, instead, right?

Yeah, sounds better.

> I also have a more conceptual question here: This additions (or the
> is_dotdir_of_file_within as suggested) are just to make patch
> compatible with the current behaviour, but are going to be removed
> soon after.

Yes. it's not an endorsement of the current behavior, but for ease of
review. I.e. to split changes into smaller logical ones ones that are
refactoring on the one hand, and behavior changes on the other.

> As this would be kind of a noise, wouldn't it be better to have a
> patch before this, already correcting copy_or_link_directory's
> behaviour on hidden dirs and them this?

Yeah, maybe structuring it like that is more readable.


>> +                               continue;
>> +                       mkdir_if_missing(dest->buf, 0777);
>>                         continue;
>>                 }
>>
>>                 /* Files that cannot be copied bit-for-bit... */
>> -               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
>> +               if (!strcmp(iter->relative_path, "info/alternates")) {
>>                         copy_alternates(src, dest, src_repo);
>>                         continue;
>>                 }
>> @@ -456,14 +463,18 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>>                 if (!option_no_hardlinks) {
>>                         if (!link(src->buf, dest->buf))
>>                                 continue;
>> -                       if (option_local > 0)
>> -                               die_errno(_("failed to create link '%s'"), dest->buf);
>> +                       if (option_local > 0 && errno != ENOENT)
>> +                               warning_errno(_("failed to create link '%s'"), dest->buf);
>>                         option_no_hardlinks = 1;
>>                 }
>> -               if (copy_file_with_time(dest->buf, src->buf, 0666))
>> +               if (copy_file_with_time(dest->buf, src->buf, 0666) && errno != ENOENT)
>>                         die_errno(_("failed to copy file to '%s'"), dest->buf);
>>         }
>> -       closedir(dir);
>> +
>> +       if (iter_status != ITER_DONE) {
>> +               strbuf_setlen(src, src_len);
>> +               die(_("failed to iterate over '%s'"), src->buf);
>> +       }
>>  }
>>
>>  static void clone_local(const char *src_repo, const char *dest_repo)
>> @@ -481,7 +492,7 @@ static void clone_local(const char *src_repo, const char *dest_repo)
>>                 get_common_dir(&dest, dest_repo);
>>                 strbuf_addstr(&src, "/objects");
>>                 strbuf_addstr(&dest, "/objects");
>> -               copy_or_link_directory(&src, &dest, src_repo, src.len);
>> +               copy_or_link_directory(&src, &dest, src_repo);
>>                 strbuf_release(&src);
>>                 strbuf_release(&dest);
>>         }
>> --
>> 2.21.0.rc2.1.g2d5e20a900.dirty
>>
diff mbox series

Patch

diff --git a/builtin/clone.c b/builtin/clone.c
index 862d2ea69c..c32e9022b3 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -23,6 +23,8 @@ 
 #include "transport.h"
 #include "strbuf.h"
 #include "dir.h"
+#include "dir-iterator.h"
+#include "iterator.h"
 #include "sigchain.h"
 #include "branch.h"
 #include "remote.h"
@@ -411,42 +413,47 @@  static void mkdir_if_missing(const char *pathname, mode_t mode)
 }
 
 static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
-				   const char *src_repo, int src_baselen)
+				   const char *src_repo)
 {
-	struct dirent *de;
-	struct stat buf;
 	int src_len, dest_len;
-	DIR *dir;
-
-	dir = opendir(src->buf);
-	if (!dir)
-		die_errno(_("failed to open '%s'"), src->buf);
+	struct dir_iterator *iter;
+	int iter_status;
+	struct stat st;
 
 	mkdir_if_missing(dest->buf, 0777);
 
+	iter = dir_iterator_begin(src->buf, 1);
+
 	strbuf_addch(src, '/');
 	src_len = src->len;
 	strbuf_addch(dest, '/');
 	dest_len = dest->len;
 
-	while ((de = readdir(dir)) != NULL) {
+	while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
 		strbuf_setlen(src, src_len);
-		strbuf_addstr(src, de->d_name);
+		strbuf_addstr(src, iter->relative_path);
 		strbuf_setlen(dest, dest_len);
-		strbuf_addstr(dest, de->d_name);
-		if (stat(src->buf, &buf)) {
+		strbuf_addstr(dest, iter->relative_path);
+
+		/*
+		 * dir_iterator_advance already calls lstat to populate iter->st
+		 * but, unlike stat, lstat does not checks for permissions on
+		 * the given path.
+		 */
+		if (stat(src->buf, &st)) {
 			warning (_("failed to stat %s\n"), src->buf);
 			continue;
 		}
-		if (S_ISDIR(buf.st_mode)) {
-			if (de->d_name[0] != '.')
-				copy_or_link_directory(src, dest,
-						       src_repo, src_baselen);
+
+		if (S_ISDIR(iter->st.st_mode)) {
+			if (iter->relative_path[0] == '.')
+				continue;
+			mkdir_if_missing(dest->buf, 0777);
 			continue;
 		}
 
 		/* Files that cannot be copied bit-for-bit... */
-		if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
+		if (!strcmp(iter->relative_path, "info/alternates")) {
 			copy_alternates(src, dest, src_repo);
 			continue;
 		}
@@ -456,14 +463,18 @@  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
 		if (!option_no_hardlinks) {
 			if (!link(src->buf, dest->buf))
 				continue;
-			if (option_local > 0)
-				die_errno(_("failed to create link '%s'"), dest->buf);
+			if (option_local > 0 && errno != ENOENT)
+				warning_errno(_("failed to create link '%s'"), dest->buf);
 			option_no_hardlinks = 1;
 		}
-		if (copy_file_with_time(dest->buf, src->buf, 0666))
+		if (copy_file_with_time(dest->buf, src->buf, 0666) && errno != ENOENT)
 			die_errno(_("failed to copy file to '%s'"), dest->buf);
 	}
-	closedir(dir);
+
+	if (iter_status != ITER_DONE) {
+		strbuf_setlen(src, src_len);
+		die(_("failed to iterate over '%s'"), src->buf);
+	}
 }
 
 static void clone_local(const char *src_repo, const char *dest_repo)
@@ -481,7 +492,7 @@  static void clone_local(const char *src_repo, const char *dest_repo)
 		get_common_dir(&dest, dest_repo);
 		strbuf_addstr(&src, "/objects");
 		strbuf_addstr(&dest, "/objects");
-		copy_or_link_directory(&src, &dest, src_repo, src.len);
+		copy_or_link_directory(&src, &dest, src_repo);
 		strbuf_release(&src);
 		strbuf_release(&dest);
 	}