diff mbox series

entry.c: use dir-iterator to avoid explicit dir traversal

Message ID 20191208180439.19018-1-otalpster@gmail.com (mailing list archive)
State New, archived
Headers show
Series entry.c: use dir-iterator to avoid explicit dir traversal | expand

Commit Message

otalpster Dec. 8, 2019, 6:04 p.m. UTC
From: Plato <otalpster@gmail.com>

Replace usage of opendir/readdir/closedir API to traverse directories
recursively, at remove_subtree() function, by the dir-iterator API. This
simplifies the code and avoids recursive calls to remove_subtree().

Signed-off-by: Plato <otalpster@gmail.com>
---
Hello,

This is my first patch.
I hope I cc'd the correct people and didn't mess up.

The changes pass the test suite t/ and Travis CI.
Please point out any mistakes.

Thanks for your time! :)

 entry.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

Comments

Derrick Stolee Dec. 9, 2019, 3:21 p.m. UTC | #1
On 12/8/2019 1:04 PM, otalpster@gmail.com wrote:
> From: Plato <otalpster@gmail.com>
> 
> Replace usage of opendir/readdir/closedir API to traverse directories
> recursively, at remove_subtree() function, by the dir-iterator API. This
> simplifies the code and avoids recursive calls to remove_subtree().
> 
> Signed-off-by: Plato <otalpster@gmail.com>
> ---
> Hello,
> 
> This is my first patch.
> I hope I cc'd the correct people and didn't mess up.

Welcome, Plato!

> The changes pass the test suite t/ and Travis CI.
> Please point out any mistakes.
> 
> Thanks for your time! :)
> 
>  entry.c | 32 +++++++++++++++-----------------
>  1 file changed, 15 insertions(+), 17 deletions(-)
> 
> diff --git a/entry.c b/entry.c
> index 53380bb614..e7f4881d3b 100644
> --- a/entry.c
> +++ b/entry.c
> @@ -2,6 +2,8 @@
>  #include "blob.h"
>  #include "object-store.h"
>  #include "dir.h"
> +#include "iterator.h"
> +#include "dir-iterator.h"
>  #include "streaming.h"
>  #include "submodule.h"
>  #include "progress.h"
> @@ -50,29 +52,25 @@ static void create_directories(const char *path, int path_len,
>  
>  static void remove_subtree(struct strbuf *path)
>  {
> -	DIR *dir = opendir(path->buf);
> -	struct dirent *de;
> -	int origlen = path->len;
> +	int ok;
> +	unsigned int flags = DIR_ITERATOR_PEDANTIC;
> +	struct dir_iterator *iter = dir_iterator_begin(path->buf, flags);
>  
> -	if (!dir)
> +	if (!iter)
>  		die_errno("cannot opendir '%s'", path->buf);
> -	while ((de = readdir(dir)) != NULL) {
> -		struct stat st;
>  
> -		if (is_dot_or_dotdot(de->d_name))
> +	while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
> +		if (is_dot_or_dotdot(iter->path.buf))
>  			continue;

The documentation in dir-iterator.h seems to skip . and .. for you.
This 'if' shouldn't be needed.

> -		strbuf_addch(path, '/');
> -		strbuf_addstr(path, de->d_name);
> -		if (lstat(path->buf, &st))
> -			die_errno("cannot lstat '%s'", path->buf);
> -		if (S_ISDIR(st.st_mode))
> -			remove_subtree(path);

It took me reading dir-iterator.h to realize that the iterator
is recursive, because I'm new to this API. This is a nice cleanup!

> -		else if (unlink(path->buf))
> -			die_errno("cannot unlink '%s'", path->buf);
> -		strbuf_setlen(path, origlen);
> +		if (unlink(iter->path.buf)) {
> +			die_errno("cannot unlink '%s'", iter->path.buf);
> +		}

nit: don't use braces for a single-line block.

>  	}
> -	closedir(dir);
> +
> +	if (ok != ITER_DONE)
> +		die(_("failed to iterate over '%s'"), path->buf);
> +
>  	if (rmdir(path->buf))
>  		die_errno("cannot rmdir '%s'", path->buf);
>  }

Thanks,
-Stolee
Matheus Tavares Dec. 9, 2019, 5:41 p.m. UTC | #2
On Sun, Dec 8, 2019 at 3:06 PM <otalpster@gmail.com> wrote:
>
> From: Plato <otalpster@gmail.com>
>
> Replace usage of opendir/readdir/closedir API to traverse directories
> recursively, at remove_subtree() function, by the dir-iterator API. This
> simplifies the code and avoids recursive calls to remove_subtree().
>
> Signed-off-by: Plato <otalpster@gmail.com>
> ---
> Hello,
>
> This is my first patch.

Hello Plato, and welcome!
Thanks for working on this.

> I hope I cc'd the correct people and didn't mess up.
>
> The changes pass the test suite t/ and Travis CI.
> Please point out any mistakes.
>
> Thanks for your time! :)
>
>  entry.c | 32 +++++++++++++++-----------------
>  1 file changed, 15 insertions(+), 17 deletions(-)
>
> diff --git a/entry.c b/entry.c
> index 53380bb614..e7f4881d3b 100644
> --- a/entry.c
> +++ b/entry.c
> @@ -2,6 +2,8 @@
>  #include "blob.h"
>  #include "object-store.h"
>  #include "dir.h"
> +#include "iterator.h"
> +#include "dir-iterator.h"
>  #include "streaming.h"
>  #include "submodule.h"
>  #include "progress.h"
> @@ -50,29 +52,25 @@ static void create_directories(const char *path, int path_len,
>
>  static void remove_subtree(struct strbuf *path)
>  {
> -       DIR *dir = opendir(path->buf);
> -       struct dirent *de;
> -       int origlen = path->len;
> +       int ok;
> +       unsigned int flags = DIR_ITERATOR_PEDANTIC;
> +       struct dir_iterator *iter = dir_iterator_begin(path->buf, flags);
>
> -       if (!dir)
> +       if (!iter)
>                 die_errno("cannot opendir '%s'", path->buf);

Nitpick: since dir_iterator_begin() might fail for reasons other than
an opendir() error, I think the error message here could be more
generic. Maybe "failed to start iterator over %s"?

> -       while ((de = readdir(dir)) != NULL) {
> -               struct stat st;
>
> -               if (is_dot_or_dotdot(de->d_name))
> +       while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
> +               if (is_dot_or_dotdot(iter->path.buf))

This check is already done by dir-iterator internally, so you may
remove it here.

>                         continue;
>
> -               strbuf_addch(path, '/');
> -               strbuf_addstr(path, de->d_name);
> -               if (lstat(path->buf, &st))
> -                       die_errno("cannot lstat '%s'", path->buf);
> -               if (S_ISDIR(st.st_mode))
> -                       remove_subtree(path);
> -               else if (unlink(path->buf))
> -                       die_errno("cannot unlink '%s'", path->buf);
> -               strbuf_setlen(path, origlen);
> +               if (unlink(iter->path.buf)) {

unlink()-ing a directory in Linux will return a EISDIR error. So I
think you still need to use S_ISDIR() to check if iter->path.buf is a
directory and call rmdir(), in this case.

However, note that the dir-iterator API gives entries in pre-order.
I.e. a directory appears before its subentries. In the use case of
remove_subtree(), though, we need to traverse in post-order, since we
have to remove the subentries before removing the directory where they
reside. My suggestion is that you add a preliminary patch,
implementing a new DIR_ITERATOR_POST_ORDER flag to dir-iterator.h, and
then use it in this patch.

You may also want to check this[1] series, which worked towards the
same goal of converting remove_subtree(). It ended up not getting
merged, back them, but some of the patches were re-used in this[2]
series which got merged. I think you could also re-use some of the
code from [1] that implements the post-order traversing and a test[3]
for remove_subtree().

Thanks,
Matheus

[1]: https://public-inbox.org/git/1493226219-33423-1-git-send-email-bnmvco@gmail.com/
[2]: https://public-inbox.org/git/cover.1562801254.git.matheus.bernardino@usp.br/
[3]: https://public-inbox.org/git/1493226219-33423-3-git-send-email-bnmvco@gmail.com/
Junio C Hamano Dec. 9, 2019, 9:18 p.m. UTC | #3
Matheus Tavares Bernardino <matheus.bernardino@usp.br> writes:

>> The changes pass the test suite t/ and Travis CI.
>> Please point out any mistakes.
>> ...
>> -               strbuf_addch(path, '/');
>> -               strbuf_addstr(path, de->d_name);
>> -               if (lstat(path->buf, &st))
>> -                       die_errno("cannot lstat '%s'", path->buf);
>> -               if (S_ISDIR(st.st_mode))
>> -                       remove_subtree(path);
>> -               else if (unlink(path->buf))
>> -                       die_errno("cannot unlink '%s'", path->buf);
>> -               strbuf_setlen(path, origlen);
>> +               if (unlink(iter->path.buf)) {
>
> unlink()-ing a directory in Linux will return a EISDIR error. So I
> think you still need to use S_ISDIR() to check if iter->path.buf is a
> directory and call rmdir(), in this case.
>
> However, note that the dir-iterator API gives entries in pre-order.
> I.e. a directory appears before its subentries. In the use case of
> remove_subtree(), though, we need to traverse in post-order, since we
> have to remove the subentries before removing the directory where they
> reside. My suggestion is that you add a preliminary patch,
> implementing a new DIR_ITERATOR_POST_ORDER flag to dir-iterator.h, and
> then use it in this patch.

Thanks for a review and a few hints to nudge a new contributor in
the right direction.  Very much appreciated.

I wonder why the bugs in this patch weren't caught by self test we
already have, by the way.  We need a bit better test coverage,
perhaps?
Matheus Tavares Dec. 10, 2019, 4:38 a.m. UTC | #4
On Mon, Dec 9, 2019 at 6:18 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Matheus Tavares Bernardino <matheus.bernardino@usp.br> writes:
>
> >> The changes pass the test suite t/ and Travis CI.
> >> Please point out any mistakes.
> >> ...
> >> -               strbuf_addch(path, '/');
> >> -               strbuf_addstr(path, de->d_name);
> >> -               if (lstat(path->buf, &st))
> >> -                       die_errno("cannot lstat '%s'", path->buf);
> >> -               if (S_ISDIR(st.st_mode))
> >> -                       remove_subtree(path);
> >> -               else if (unlink(path->buf))
> >> -                       die_errno("cannot unlink '%s'", path->buf);
> >> -               strbuf_setlen(path, origlen);
> >> +               if (unlink(iter->path.buf)) {
> >
> > unlink()-ing a directory in Linux will return a EISDIR error. So I
> > think you still need to use S_ISDIR() to check if iter->path.buf is a
> > directory and call rmdir(), in this case.
> >
> > However, note that the dir-iterator API gives entries in pre-order.
> > I.e. a directory appears before its subentries. In the use case of
> > remove_subtree(), though, we need to traverse in post-order, since we
> > have to remove the subentries before removing the directory where they
> > reside. My suggestion is that you add a preliminary patch,
> > implementing a new DIR_ITERATOR_POST_ORDER flag to dir-iterator.h, and
> > then use it in this patch.
>
> Thanks for a review and a few hints to nudge a new contributor in
> the right direction.  Very much appreciated.
>
> I wonder why the bugs in this patch weren't caught by self test we
> already have, by the way.  We need a bit better test coverage,
> perhaps?

I think there is no current test that covers remove_subtree() being
called with nested directories. But we could use the test proposed by
Daniel[1], which does fail when this current patch is applied. So,
maybe, Plato could also include this test in v2, before performing the
dir-iterator convertion.

[1]: https://public-inbox.org/git/1493226219-33423-3-git-send-email-bnmvco@gmail.com/
otalpster Dec. 16, 2019, 4:12 p.m. UTC | #5
On Tue, Dec 10, 2019 at 6:38 AM Matheus Tavares Bernardino
<matheus.bernardino@usp.br> wrote:
>
> On Mon, Dec 9, 2019 at 6:18 PM Junio C Hamano <gitster@pobox.com> wrote:
> >
> > Matheus Tavares Bernardino <matheus.bernardino@usp.br> writes:
> >
> > >> The changes pass the test suite t/ and Travis CI.
> > >> Please point out any mistakes.
> > >> ...
> > >> -               strbuf_addch(path, '/');
> > >> -               strbuf_addstr(path, de->d_name);
> > >> -               if (lstat(path->buf, &st))
> > >> -                       die_errno("cannot lstat '%s'", path->buf);
> > >> -               if (S_ISDIR(st.st_mode))
> > >> -                       remove_subtree(path);
> > >> -               else if (unlink(path->buf))
> > >> -                       die_errno("cannot unlink '%s'", path->buf);
> > >> -               strbuf_setlen(path, origlen);
> > >> +               if (unlink(iter->path.buf)) {
> > >
> > > unlink()-ing a directory in Linux will return a EISDIR error. So I
> > > think you still need to use S_ISDIR() to check if iter->path.buf is a
> > > directory and call rmdir(), in this case.
> > >
> > > However, note that the dir-iterator API gives entries in pre-order.
> > > I.e. a directory appears before its subentries. In the use case of
> > > remove_subtree(), though, we need to traverse in post-order, since we
> > > have to remove the subentries before removing the directory where they
> > > reside. My suggestion is that you add a preliminary patch,
> > > implementing a new DIR_ITERATOR_POST_ORDER flag to dir-iterator.h, and
> > > then use it in this patch.
> >
> > Thanks for a review and a few hints to nudge a new contributor in
> > the right direction.  Very much appreciated.
> >
> > I wonder why the bugs in this patch weren't caught by self test we
> > already have, by the way.  We need a bit better test coverage,
> > perhaps?
>
> I think there is no current test that covers remove_subtree() being
> called with nested directories. But we could use the test proposed by
> Daniel[1], which does fail when this current patch is applied. So,
> maybe, Plato could also include this test in v2, before performing the
> dir-iterator convertion.
>
> [1]: https://public-inbox.org/git/1493226219-33423-3-git-send-email-bnmvco@gmail.com/

Hello!
Thanks for the warm welcome and sorry for the late reply.
Your review and hints you provided were very helpful and appreciated.

Yes, I can work and include the test in v2, as well as performing the
dir-iterator conversion. The required process and the necessary
changes I need to make were very clear.

This week I'm very busy. I'll work on this in great detail from next week.

Thanks,
Plato
diff mbox series

Patch

diff --git a/entry.c b/entry.c
index 53380bb614..e7f4881d3b 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@ 
 #include "blob.h"
 #include "object-store.h"
 #include "dir.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 #include "streaming.h"
 #include "submodule.h"
 #include "progress.h"
@@ -50,29 +52,25 @@  static void create_directories(const char *path, int path_len,
 
 static void remove_subtree(struct strbuf *path)
 {
-	DIR *dir = opendir(path->buf);
-	struct dirent *de;
-	int origlen = path->len;
+	int ok;
+	unsigned int flags = DIR_ITERATOR_PEDANTIC;
+	struct dir_iterator *iter = dir_iterator_begin(path->buf, flags);
 
-	if (!dir)
+	if (!iter)
 		die_errno("cannot opendir '%s'", path->buf);
-	while ((de = readdir(dir)) != NULL) {
-		struct stat st;
 
-		if (is_dot_or_dotdot(de->d_name))
+	while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
+		if (is_dot_or_dotdot(iter->path.buf))
 			continue;
 
-		strbuf_addch(path, '/');
-		strbuf_addstr(path, de->d_name);
-		if (lstat(path->buf, &st))
-			die_errno("cannot lstat '%s'", path->buf);
-		if (S_ISDIR(st.st_mode))
-			remove_subtree(path);
-		else if (unlink(path->buf))
-			die_errno("cannot unlink '%s'", path->buf);
-		strbuf_setlen(path, origlen);
+		if (unlink(iter->path.buf)) {
+			die_errno("cannot unlink '%s'", iter->path.buf);
+		}
 	}
-	closedir(dir);
+
+	if (ok != ITER_DONE)
+		die(_("failed to iterate over '%s'"), path->buf);
+
 	if (rmdir(path->buf))
 		die_errno("cannot rmdir '%s'", path->buf);
 }