[WIP,RFC,v2,5/5] clone: use dir-iterator to avoid explicit dir traversal
diff mbox series

Message ID 20190226051804.10631-6-matheus.bernardino@usp.br
State New
Headers show
Series
  • clone: dir iterator refactoring with tests
Related show

Commit Message

Matheus Tavares Bernardino Feb. 26, 2019, 5:18 a.m. UTC
Replace usage of opendir/readdir/closedir API to traverse directories
recursively, at copy_or_link_directory function, by the dir-iterator
API. This simplifies the code and avoid recursive calls to
copy_or_link_directory.

This process also makes copy_or_link_directory call die() in case of an
error on readdir or stat, inside dir_iterator_advance. Previously it
would just print a warning for errors on stat and ignore errors on
readdir, which isn't nice because a local git clone would end up
successfully even though the .git/objects copy didn't fully succeeded.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
I can also make the change described in the last paragraph in a separate
patch before this one, but I would have to undo it in this patch because
dir-iterator already implements it. So, IMHO, it would be just noise
and not worthy.

 builtin/clone.c | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Feb. 26, 2019, 11:35 a.m. UTC | #1
On Tue, Feb 26 2019, Matheus Tavares wrote:

> +	int iter_status;
> +	struct stat st;
> +	unsigned flags;

If you compile with DEVELOPER=1 you'll get compile-time errors when
variables aren't used. Like "st" here.
Duy Nguyen Feb. 26, 2019, 12:32 p.m. UTC | #2
On Tue, Feb 26, 2019 at 12:18 PM Matheus Tavares
<matheus.bernardino@usp.br> wrote:
>
> Replace usage of opendir/readdir/closedir API to traverse directories
> recursively, at copy_or_link_directory function, by the dir-iterator
> API. This simplifies the code and avoid recursive calls to
> copy_or_link_directory.
>
> This process also makes copy_or_link_directory call die() in case of an
> error on readdir or stat, inside dir_iterator_advance. Previously it
> would just print a warning for errors on stat and ignore errors on
> readdir, which isn't nice because a local git clone would end up
> successfully even though the .git/objects copy didn't fully succeeded.
>
> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> ---
> I can also make the change described in the last paragraph in a separate
> patch before this one, but I would have to undo it in this patch because
> dir-iterator already implements it. So, IMHO, it would be just noise
> and not worthy.
>
>  builtin/clone.c | 45 +++++++++++++++++++++++----------------------
>  1 file changed, 23 insertions(+), 22 deletions(-)
>
> diff --git a/builtin/clone.c b/builtin/clone.c
> index fd580fa98d..b23ba64c94 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -23,6 +23,8 @@
>  #include "transport.h"
>  #include "strbuf.h"
>  #include "dir.h"
> +#include "dir-iterator.h"
> +#include "iterator.h"
>  #include "sigchain.h"
>  #include "branch.h"
>  #include "remote.h"
> @@ -411,42 +413,37 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
>  }
>
>  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
> -                                  const char *src_repo, int src_baselen)
> +                                  const char *src_repo)
>  {
> -       struct dirent *de;
> -       struct stat buf;
>         int src_len, dest_len;
> -       DIR *dir;
> -
> -       dir = opendir(src->buf);
> -       if (!dir)
> -               die_errno(_("failed to open '%s'"), src->buf);
> +       struct dir_iterator *iter;
> +       int iter_status;
> +       struct stat st;
> +       unsigned flags;
>
>         mkdir_if_missing(dest->buf, 0777);
>
> +       flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
> +       iter = dir_iterator_begin(src->buf, flags);
> +
>         strbuf_addch(src, '/');
>         src_len = src->len;
>         strbuf_addch(dest, '/');
>         dest_len = dest->len;
>
> -       while ((de = readdir(dir)) != NULL) {
> +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
>                 strbuf_setlen(src, src_len);
> -               strbuf_addstr(src, de->d_name);
> +               strbuf_addstr(src, iter->relative_path);
>                 strbuf_setlen(dest, dest_len);
> -               strbuf_addstr(dest, de->d_name);
> -               if (stat(src->buf, &buf)) {
> -                       warning (_("failed to stat %s\n"), src->buf);
> -                       continue;
> -               }
> -               if (S_ISDIR(buf.st_mode)) {
> -                       if (!is_dot_or_dotdot(de->d_name))
> -                               copy_or_link_directory(src, dest,
> -                                                      src_repo, src_baselen);
> +               strbuf_addstr(dest, iter->relative_path);
> +
> +               if (S_ISDIR(iter->st.st_mode)) {
> +                       mkdir_if_missing(dest->buf, 0777);

I wonder if this mkdir_if_missing is sufficient. What if you have to
create multiple directories?

Let's say the first advance, we hit "a". The the second advance we hit
directory "b/b/b/b", we would need to mkdir recursively and something
like safe_create_leading_directories() would be a better fit.

I'm not sure if it can happen though. I haven't re-read dir-iterator
code carefully.

>                         continue;
>                 }
>
>                 /* Files that cannot be copied bit-for-bit... */
> -               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
> +               if (!strcmp(iter->relative_path, "info/alternates")) {

While we're here, this should be fspathcmp to be friendlier to
case-insensitive filesystems. You probably should fix it in a separate
patch though.

>                         copy_alternates(src, dest, src_repo);
>                         continue;
>                 }
> @@ -463,7 +460,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>                 if (copy_file_with_time(dest->buf, src->buf, 0666))
>                         die_errno(_("failed to copy file to '%s'"), dest->buf);
>         }
> -       closedir(dir);
> +
> +       if (iter_status != ITER_DONE) {
> +               strbuf_setlen(src, src_len);
> +               die(_("failed to iterate over '%s'"), src->buf);
> +       }

I think you need to abort the iterator even when it returns ITER_DONE.
At least that's how the first caller in files-backend.c does it.

>  }
>
>  static void clone_local(const char *src_repo, const char *dest_repo)
> @@ -481,7 +482,7 @@ static void clone_local(const char *src_repo, const char *dest_repo)
>                 get_common_dir(&dest, dest_repo);
>                 strbuf_addstr(&src, "/objects");
>                 strbuf_addstr(&dest, "/objects");
> -               copy_or_link_directory(&src, &dest, src_repo, src.len);
> +               copy_or_link_directory(&src, &dest, src_repo);
>                 strbuf_release(&src);
>                 strbuf_release(&dest);
>         }
> --
> 2.20.1
>
Ævar Arnfjörð Bjarmason Feb. 26, 2019, 12:50 p.m. UTC | #3
On Tue, Feb 26 2019, Duy Nguyen wrote:

> On Tue, Feb 26, 2019 at 12:18 PM Matheus Tavares
> <matheus.bernardino@usp.br> wrote:
>>
>> Replace usage of opendir/readdir/closedir API to traverse directories
>> recursively, at copy_or_link_directory function, by the dir-iterator
>> API. This simplifies the code and avoid recursive calls to
>> copy_or_link_directory.
>>
>> This process also makes copy_or_link_directory call die() in case of an
>> error on readdir or stat, inside dir_iterator_advance. Previously it
>> would just print a warning for errors on stat and ignore errors on
>> readdir, which isn't nice because a local git clone would end up
>> successfully even though the .git/objects copy didn't fully succeeded.
>>
>> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
>> ---
>> I can also make the change described in the last paragraph in a separate
>> patch before this one, but I would have to undo it in this patch because
>> dir-iterator already implements it. So, IMHO, it would be just noise
>> and not worthy.
>>
>>  builtin/clone.c | 45 +++++++++++++++++++++++----------------------
>>  1 file changed, 23 insertions(+), 22 deletions(-)
>>
>> diff --git a/builtin/clone.c b/builtin/clone.c
>> index fd580fa98d..b23ba64c94 100644
>> --- a/builtin/clone.c
>> +++ b/builtin/clone.c
>> @@ -23,6 +23,8 @@
>>  #include "transport.h"
>>  #include "strbuf.h"
>>  #include "dir.h"
>> +#include "dir-iterator.h"
>> +#include "iterator.h"
>>  #include "sigchain.h"
>>  #include "branch.h"
>>  #include "remote.h"
>> @@ -411,42 +413,37 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
>>  }
>>
>>  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>> -                                  const char *src_repo, int src_baselen)
>> +                                  const char *src_repo)
>>  {
>> -       struct dirent *de;
>> -       struct stat buf;
>>         int src_len, dest_len;
>> -       DIR *dir;
>> -
>> -       dir = opendir(src->buf);
>> -       if (!dir)
>> -               die_errno(_("failed to open '%s'"), src->buf);
>> +       struct dir_iterator *iter;
>> +       int iter_status;
>> +       struct stat st;
>> +       unsigned flags;
>>
>>         mkdir_if_missing(dest->buf, 0777);
>>
>> +       flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
>> +       iter = dir_iterator_begin(src->buf, flags);
>> +
>>         strbuf_addch(src, '/');
>>         src_len = src->len;
>>         strbuf_addch(dest, '/');
>>         dest_len = dest->len;
>>
>> -       while ((de = readdir(dir)) != NULL) {
>> +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
>>                 strbuf_setlen(src, src_len);
>> -               strbuf_addstr(src, de->d_name);
>> +               strbuf_addstr(src, iter->relative_path);
>>                 strbuf_setlen(dest, dest_len);
>> -               strbuf_addstr(dest, de->d_name);
>> -               if (stat(src->buf, &buf)) {
>> -                       warning (_("failed to stat %s\n"), src->buf);
>> -                       continue;
>> -               }
>> -               if (S_ISDIR(buf.st_mode)) {
>> -                       if (!is_dot_or_dotdot(de->d_name))
>> -                               copy_or_link_directory(src, dest,
>> -                                                      src_repo, src_baselen);
>> +               strbuf_addstr(dest, iter->relative_path);
>> +
>> +               if (S_ISDIR(iter->st.st_mode)) {
>> +                       mkdir_if_missing(dest->buf, 0777);
>
> I wonder if this mkdir_if_missing is sufficient. What if you have to
> create multiple directories?
>
> Let's say the first advance, we hit "a". The the second advance we hit
> directory "b/b/b/b", we would need to mkdir recursively and something
> like safe_create_leading_directories() would be a better fit.
>
> I'm not sure if it can happen though. I haven't re-read dir-iterator
> code carefully.

This part isn't a problem. It iterates one level at a time. So given a
structure like a/b/c/d/e/f/g/h/i/j/k/some-l you'll find that if you
instrument the loop in clone.c you get:

    dir = a
    dir = a/b
    dir = a/b/c
    dir = a/b/c/d
    dir = a/b/c/d/e
    dir = a/b/c/d/e/f
    dir = a/b/c/d/e/f/g
    dir = a/b/c/d/e/f/g/h
    dir = a/b/c/d/e/f/g/h/i
    dir = a/b/c/d/e/f/g/h/i/j
    dir = a/b/c/d/e/f/g/h/i/j/k
    dir = a/b/c/d/e/f/g/h/i/j/k/some-l

So it's like the old implementation in that way. It readdir()'s and
walks directories one level at a time.
Matheus Tavares Bernardino Feb. 27, 2019, 5:40 p.m. UTC | #4
On Tue, Feb 26, 2019 at 9:32 AM Duy Nguyen <pclouds@gmail.com> wrote:
>
> On Tue, Feb 26, 2019 at 12:18 PM Matheus Tavares
> <matheus.bernardino@usp.br> wrote:
> >
> > Replace usage of opendir/readdir/closedir API to traverse directories
> > recursively, at copy_or_link_directory function, by the dir-iterator
> > API. This simplifies the code and avoid recursive calls to
> > copy_or_link_directory.
> >
> > This process also makes copy_or_link_directory call die() in case of an
> > error on readdir or stat, inside dir_iterator_advance. Previously it
> > would just print a warning for errors on stat and ignore errors on
> > readdir, which isn't nice because a local git clone would end up
> > successfully even though the .git/objects copy didn't fully succeeded.
> >
> > Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> > ---
> > I can also make the change described in the last paragraph in a separate
> > patch before this one, but I would have to undo it in this patch because
> > dir-iterator already implements it. So, IMHO, it would be just noise
> > and not worthy.
> >
> >  builtin/clone.c | 45 +++++++++++++++++++++++----------------------
> >  1 file changed, 23 insertions(+), 22 deletions(-)
> >
> > diff --git a/builtin/clone.c b/builtin/clone.c
> > index fd580fa98d..b23ba64c94 100644
> > --- a/builtin/clone.c
> > +++ b/builtin/clone.c
> > @@ -23,6 +23,8 @@
> >  #include "transport.h"
> >  #include "strbuf.h"
> >  #include "dir.h"
> > +#include "dir-iterator.h"
> > +#include "iterator.h"
> >  #include "sigchain.h"
> >  #include "branch.h"
> >  #include "remote.h"
> > @@ -411,42 +413,37 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
> >  }
> >
> >  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
> > -                                  const char *src_repo, int src_baselen)
> > +                                  const char *src_repo)
> >  {
> > -       struct dirent *de;
> > -       struct stat buf;
> >         int src_len, dest_len;
> > -       DIR *dir;
> > -
> > -       dir = opendir(src->buf);
> > -       if (!dir)
> > -               die_errno(_("failed to open '%s'"), src->buf);
> > +       struct dir_iterator *iter;
> > +       int iter_status;
> > +       struct stat st;
> > +       unsigned flags;
> >
> >         mkdir_if_missing(dest->buf, 0777);
> >
> > +       flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
> > +       iter = dir_iterator_begin(src->buf, flags);
> > +
> >         strbuf_addch(src, '/');
> >         src_len = src->len;
> >         strbuf_addch(dest, '/');
> >         dest_len = dest->len;
> >
> > -       while ((de = readdir(dir)) != NULL) {
> > +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
> >                 strbuf_setlen(src, src_len);
> > -               strbuf_addstr(src, de->d_name);
> > +               strbuf_addstr(src, iter->relative_path);
> >                 strbuf_setlen(dest, dest_len);
> > -               strbuf_addstr(dest, de->d_name);
> > -               if (stat(src->buf, &buf)) {
> > -                       warning (_("failed to stat %s\n"), src->buf);
> > -                       continue;
> > -               }
> > -               if (S_ISDIR(buf.st_mode)) {
> > -                       if (!is_dot_or_dotdot(de->d_name))
> > -                               copy_or_link_directory(src, dest,
> > -                                                      src_repo, src_baselen);
> > +               strbuf_addstr(dest, iter->relative_path);
> > +
> > +               if (S_ISDIR(iter->st.st_mode)) {
> > +                       mkdir_if_missing(dest->buf, 0777);
>
> I wonder if this mkdir_if_missing is sufficient. What if you have to
> create multiple directories?
>
> Let's say the first advance, we hit "a". The the second advance we hit
> directory "b/b/b/b", we would need to mkdir recursively and something
> like safe_create_leading_directories() would be a better fit.
>
> I'm not sure if it can happen though. I haven't re-read dir-iterator
> code carefully.
>
> >                         continue;
> >                 }
> >
> >                 /* Files that cannot be copied bit-for-bit... */
> > -               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
> > +               if (!strcmp(iter->relative_path, "info/alternates")) {
>
> While we're here, this should be fspathcmp to be friendlier to
> case-insensitive filesystems. You probably should fix it in a separate
> patch though.
>

Nice! I will make this change in a separate patch in the series. Thanks!

> >                         copy_alternates(src, dest, src_repo);
> >                         continue;
> >                 }
> > @@ -463,7 +460,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
> >                 if (copy_file_with_time(dest->buf, src->buf, 0666))
> >                         die_errno(_("failed to copy file to '%s'"), dest->buf);
> >         }
> > -       closedir(dir);
> > +
> > +       if (iter_status != ITER_DONE) {
> > +               strbuf_setlen(src, src_len);
> > +               die(_("failed to iterate over '%s'"), src->buf);
> > +       }
>
> I think you need to abort the iterator even when it returns ITER_DONE.
> At least that's how the first caller in files-backend.c does it.
>

Hm, I don't think so, since dir_iterator_advance() already frees the
resources before returning ITER_DONE. Also, I may be wrong, but it
doesn't seem to me, that files-backend.c does it. The function
files_reflog_iterator_advance() that calls dir_iterator_advance() even
sets the dir-iterator pointer to NULL as soon as ITER_DONE is
returned.


> >  }
> >
> >  static void clone_local(const char *src_repo, const char *dest_repo)
> > @@ -481,7 +482,7 @@ static void clone_local(const char *src_repo, const char *dest_repo)
> >                 get_common_dir(&dest, dest_repo);
> >                 strbuf_addstr(&src, "/objects");
> >                 strbuf_addstr(&dest, "/objects");
> > -               copy_or_link_directory(&src, &dest, src_repo, src.len);
> > +               copy_or_link_directory(&src, &dest, src_repo);
> >                 strbuf_release(&src);
> >                 strbuf_release(&dest);
> >         }
> > --
> > 2.20.1
> >
>
>
> --
> Duy
Duy Nguyen Feb. 28, 2019, 7:13 a.m. UTC | #5
On Thu, Feb 28, 2019 at 12:40 AM Matheus Tavares Bernardino
<matheus.bernardino@usp.br> wrote:
> > > @@ -463,7 +460,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
> > >                 if (copy_file_with_time(dest->buf, src->buf, 0666))
> > >                         die_errno(_("failed to copy file to '%s'"), dest->buf);
> > >         }
> > > -       closedir(dir);
> > > +
> > > +       if (iter_status != ITER_DONE) {
> > > +               strbuf_setlen(src, src_len);
> > > +               die(_("failed to iterate over '%s'"), src->buf);
> > > +       }
> >
> > I think you need to abort the iterator even when it returns ITER_DONE.
> > At least that's how the first caller in files-backend.c does it.
> >
>
> Hm, I don't think so, since dir_iterator_advance() already frees the
> resources before returning ITER_DONE. Also, I may be wrong, but it
> doesn't seem to me, that files-backend.c does it. The function
> files_reflog_iterator_advance() that calls dir_iterator_advance() even
> sets the dir-iterator pointer to NULL as soon as ITER_DONE is
> returned.

Arghhh.. I read the ref_iterator_abort and thought it was
dir_iterator_abort! Sorry for the noise, too many iterators.
Ævar Arnfjörð Bjarmason Feb. 28, 2019, 7:53 a.m. UTC | #6
On Wed, Feb 27 2019, Matheus Tavares Bernardino wrote:

> On Tue, Feb 26, 2019 at 9:32 AM Duy Nguyen <pclouds@gmail.com> wrote:
>>
>> On Tue, Feb 26, 2019 at 12:18 PM Matheus Tavares
>> <matheus.bernardino@usp.br> wrote:
>> >
>> > Replace usage of opendir/readdir/closedir API to traverse directories
>> > recursively, at copy_or_link_directory function, by the dir-iterator
>> > API. This simplifies the code and avoid recursive calls to
>> > copy_or_link_directory.
>> >
>> > This process also makes copy_or_link_directory call die() in case of an
>> > error on readdir or stat, inside dir_iterator_advance. Previously it
>> > would just print a warning for errors on stat and ignore errors on
>> > readdir, which isn't nice because a local git clone would end up
>> > successfully even though the .git/objects copy didn't fully succeeded.
>> >
>> > Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
>> > ---
>> > I can also make the change described in the last paragraph in a separate
>> > patch before this one, but I would have to undo it in this patch because
>> > dir-iterator already implements it. So, IMHO, it would be just noise
>> > and not worthy.
>> >
>> >  builtin/clone.c | 45 +++++++++++++++++++++++----------------------
>> >  1 file changed, 23 insertions(+), 22 deletions(-)
>> >
>> > diff --git a/builtin/clone.c b/builtin/clone.c
>> > index fd580fa98d..b23ba64c94 100644
>> > --- a/builtin/clone.c
>> > +++ b/builtin/clone.c
>> > @@ -23,6 +23,8 @@
>> >  #include "transport.h"
>> >  #include "strbuf.h"
>> >  #include "dir.h"
>> > +#include "dir-iterator.h"
>> > +#include "iterator.h"
>> >  #include "sigchain.h"
>> >  #include "branch.h"
>> >  #include "remote.h"
>> > @@ -411,42 +413,37 @@ static void mkdir_if_missing(const char *pathname, mode_t mode)
>> >  }
>> >
>> >  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>> > -                                  const char *src_repo, int src_baselen)
>> > +                                  const char *src_repo)
>> >  {
>> > -       struct dirent *de;
>> > -       struct stat buf;
>> >         int src_len, dest_len;
>> > -       DIR *dir;
>> > -
>> > -       dir = opendir(src->buf);
>> > -       if (!dir)
>> > -               die_errno(_("failed to open '%s'"), src->buf);
>> > +       struct dir_iterator *iter;
>> > +       int iter_status;
>> > +       struct stat st;
>> > +       unsigned flags;
>> >
>> >         mkdir_if_missing(dest->buf, 0777);
>> >
>> > +       flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
>> > +       iter = dir_iterator_begin(src->buf, flags);
>> > +
>> >         strbuf_addch(src, '/');
>> >         src_len = src->len;
>> >         strbuf_addch(dest, '/');
>> >         dest_len = dest->len;
>> >
>> > -       while ((de = readdir(dir)) != NULL) {
>> > +       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
>> >                 strbuf_setlen(src, src_len);
>> > -               strbuf_addstr(src, de->d_name);
>> > +               strbuf_addstr(src, iter->relative_path);
>> >                 strbuf_setlen(dest, dest_len);
>> > -               strbuf_addstr(dest, de->d_name);
>> > -               if (stat(src->buf, &buf)) {
>> > -                       warning (_("failed to stat %s\n"), src->buf);
>> > -                       continue;
>> > -               }
>> > -               if (S_ISDIR(buf.st_mode)) {
>> > -                       if (!is_dot_or_dotdot(de->d_name))
>> > -                               copy_or_link_directory(src, dest,
>> > -                                                      src_repo, src_baselen);
>> > +               strbuf_addstr(dest, iter->relative_path);
>> > +
>> > +               if (S_ISDIR(iter->st.st_mode)) {
>> > +                       mkdir_if_missing(dest->buf, 0777);
>>
>> I wonder if this mkdir_if_missing is sufficient. What if you have to
>> create multiple directories?
>>
>> Let's say the first advance, we hit "a". The the second advance we hit
>> directory "b/b/b/b", we would need to mkdir recursively and something
>> like safe_create_leading_directories() would be a better fit.
>>
>> I'm not sure if it can happen though. I haven't re-read dir-iterator
>> code carefully.
>>
>> >                         continue;
>> >                 }
>> >
>> >                 /* Files that cannot be copied bit-for-bit... */
>> > -               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
>> > +               if (!strcmp(iter->relative_path, "info/alternates")) {
>>
>> While we're here, this should be fspathcmp to be friendlier to
>> case-insensitive filesystems. You probably should fix it in a separate
>> patch though.
>>
>
> Nice! I will make this change in a separate patch in the series. Thanks!
>
>> >                         copy_alternates(src, dest, src_repo);
>> >                         continue;
>> >                 }
>> > @@ -463,7 +460,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
>> >                 if (copy_file_with_time(dest->buf, src->buf, 0666))
>> >                         die_errno(_("failed to copy file to '%s'"), dest->buf);
>> >         }
>> > -       closedir(dir);
>> > +
>> > +       if (iter_status != ITER_DONE) {
>> > +               strbuf_setlen(src, src_len);
>> > +               die(_("failed to iterate over '%s'"), src->buf);
>> > +       }
>>
>> I think you need to abort the iterator even when it returns ITER_DONE.
>> At least that's how the first caller in files-backend.c does it.
>>
>
> Hm, I don't think so, since dir_iterator_advance() already frees the
> resources before returning ITER_DONE. Also, I may be wrong, but it
> doesn't seem to me, that files-backend.c does it. The function
> files_reflog_iterator_advance() that calls dir_iterator_advance() even
> sets the dir-iterator pointer to NULL as soon as ITER_DONE is
> returned.

As Duy notes you're right about this. Just to add: This pattern is
usually something we avoid in the git codebase, i.e. we try not to make
it an error to call the whatever_utility_free() function twice.

See e.g. stop_progress_msg for such an implementation, i.e. we'll check
if it's NULL already and exit early, and maybe use FREE_AND_NULL()
instead of NULL.

It means that for the cost of trivial overhead you don't need to worry
about double freeing or maintaining a "was this freed?" state machine.

Now, whether you want to fix that while you're at it is another matter,
just pointing out that we usually try to avoid this problem entirely...

Patch
diff mbox series

diff --git a/builtin/clone.c b/builtin/clone.c
index fd580fa98d..b23ba64c94 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -23,6 +23,8 @@ 
 #include "transport.h"
 #include "strbuf.h"
 #include "dir.h"
+#include "dir-iterator.h"
+#include "iterator.h"
 #include "sigchain.h"
 #include "branch.h"
 #include "remote.h"
@@ -411,42 +413,37 @@  static void mkdir_if_missing(const char *pathname, mode_t mode)
 }
 
 static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
-				   const char *src_repo, int src_baselen)
+				   const char *src_repo)
 {
-	struct dirent *de;
-	struct stat buf;
 	int src_len, dest_len;
-	DIR *dir;
-
-	dir = opendir(src->buf);
-	if (!dir)
-		die_errno(_("failed to open '%s'"), src->buf);
+	struct dir_iterator *iter;
+	int iter_status;
+	struct stat st;
+	unsigned flags;
 
 	mkdir_if_missing(dest->buf, 0777);
 
+	flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
+	iter = dir_iterator_begin(src->buf, flags);
+
 	strbuf_addch(src, '/');
 	src_len = src->len;
 	strbuf_addch(dest, '/');
 	dest_len = dest->len;
 
-	while ((de = readdir(dir)) != NULL) {
+	while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
 		strbuf_setlen(src, src_len);
-		strbuf_addstr(src, de->d_name);
+		strbuf_addstr(src, iter->relative_path);
 		strbuf_setlen(dest, dest_len);
-		strbuf_addstr(dest, de->d_name);
-		if (stat(src->buf, &buf)) {
-			warning (_("failed to stat %s\n"), src->buf);
-			continue;
-		}
-		if (S_ISDIR(buf.st_mode)) {
-			if (!is_dot_or_dotdot(de->d_name))
-				copy_or_link_directory(src, dest,
-						       src_repo, src_baselen);
+		strbuf_addstr(dest, iter->relative_path);
+
+		if (S_ISDIR(iter->st.st_mode)) {
+			mkdir_if_missing(dest->buf, 0777);
 			continue;
 		}
 
 		/* Files that cannot be copied bit-for-bit... */
-		if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
+		if (!strcmp(iter->relative_path, "info/alternates")) {
 			copy_alternates(src, dest, src_repo);
 			continue;
 		}
@@ -463,7 +460,11 @@  static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
 		if (copy_file_with_time(dest->buf, src->buf, 0666))
 			die_errno(_("failed to copy file to '%s'"), dest->buf);
 	}
-	closedir(dir);
+
+	if (iter_status != ITER_DONE) {
+		strbuf_setlen(src, src_len);
+		die(_("failed to iterate over '%s'"), src->buf);
+	}
 }
 
 static void clone_local(const char *src_repo, const char *dest_repo)
@@ -481,7 +482,7 @@  static void clone_local(const char *src_repo, const char *dest_repo)
 		get_common_dir(&dest, dest_repo);
 		strbuf_addstr(&src, "/objects");
 		strbuf_addstr(&dest, "/objects");
-		copy_or_link_directory(&src, &dest, src_repo, src.len);
+		copy_or_link_directory(&src, &dest, src_repo);
 		strbuf_release(&src);
 		strbuf_release(&dest);
 	}