[GSoC,v5,3/7] dir-iterator: add flags parameter to dir_iterator_begin
diff mbox series

Message ID 20190330224907.3277-4-matheus.bernardino@usp.br
State New
Headers show
Series
  • clone: dir-iterator refactoring with tests
Related show

Commit Message

Matheus Tavares Bernardino March 30, 2019, 10:49 p.m. UTC
Add the possibility of giving flags to dir_iterator_begin to initialize
a dir-iterator with special options.

Currently possible flags are DIR_ITERATOR_PEDANTIC, which makes
dir_iterator_advance abort immediately in the case of an error while
trying to fetch next entry; and DIR_ITERATOR_FOLLOW_SYMLINKS, which
makes the iteration follow symlinks to directories and include its
contents in the iteration. These new flags will be used in a subsequent
patch.

Also adjust refs/files-backend.c to the new dir_iterator_begin
signature.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 dir-iterator.c       | 28 +++++++++++++++++++++++++---
 dir-iterator.h       | 39 +++++++++++++++++++++++++++++++++------
 refs/files-backend.c |  2 +-
 3 files changed, 59 insertions(+), 10 deletions(-)

Comments

Thomas Gummerer March 31, 2019, 6:12 p.m. UTC | #1
On 03/30, Matheus Tavares wrote:
> Add the possibility of giving flags to dir_iterator_begin to initialize
> a dir-iterator with special options.
> 
> Currently possible flags are DIR_ITERATOR_PEDANTIC, which makes
> dir_iterator_advance abort immediately in the case of an error while
> trying to fetch next entry; and DIR_ITERATOR_FOLLOW_SYMLINKS, which
> makes the iteration follow symlinks to directories and include its
> contents in the iteration. These new flags will be used in a subsequent
> patch.
> 
> Also adjust refs/files-backend.c to the new dir_iterator_begin
> signature.
> 
> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> ---
>  dir-iterator.c       | 28 +++++++++++++++++++++++++---
>  dir-iterator.h       | 39 +++++++++++++++++++++++++++++++++------
>  refs/files-backend.c |  2 +-
>  3 files changed, 59 insertions(+), 10 deletions(-)
> 
> diff --git a/dir-iterator.c b/dir-iterator.c
> index f2dcd82fde..17aca8ea41 100644
> --- a/dir-iterator.c
> +++ b/dir-iterator.c
> @@ -48,12 +48,16 @@ struct dir_iterator_int {
>  	 * that will be included in this iteration.
>  	 */
>  	struct dir_iterator_level *levels;
> +
> +	/* Combination of flags for this dir-iterator */
> +	unsigned flags;
>  };
>  
>  int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  {
>  	struct dir_iterator_int *iter =
>  		(struct dir_iterator_int *)dir_iterator;
> +	int ret;

Minor nit: I'd define this variable closer to where it is actually
used, inside the second 'while(1)' loop in this function.  That would
make it clearer that it's only used there and not in other places in
the function as well, which I had first expected when I read this.

>  	while (1) {
>  		struct dir_iterator_level *level =
> @@ -71,6 +75,8 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  
>  			level->dir = opendir(iter->base.path.buf);
>  			if (!level->dir && errno != ENOENT) {
> +				if (iter->flags & DIR_ITERATOR_PEDANTIC)
> +					goto error_out;
>  				warning("error opening directory %s: %s",
>  					iter->base.path.buf, strerror(errno));
>  				/* Popping the level is handled below */
> @@ -122,6 +128,8 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  			if (!de) {
>  				/* This level is exhausted; pop up a level. */
>  				if (errno) {
> +					if (iter->flags & DIR_ITERATOR_PEDANTIC)
> +						goto error_out;
>  					warning("error reading directory %s: %s",
>  						iter->base.path.buf, strerror(errno));
>  				} else if (closedir(level->dir))
> @@ -138,11 +146,20 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  				continue;
>  
>  			strbuf_addstr(&iter->base.path, de->d_name);
> -			if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
> -				if (errno != ENOENT)
> +
> +			if (iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS)
> +				ret = stat(iter->base.path.buf, &iter->base.st);
> +			else
> +				ret = lstat(iter->base.path.buf, &iter->base.st);
> +
> +			if (ret < 0) {
> +				if (errno != ENOENT) {
> +					if (iter->flags & DIR_ITERATOR_PEDANTIC)
> +						goto error_out;
>  					warning("error reading path '%s': %s",
>  						iter->base.path.buf,
>  						strerror(errno));
> +				}
>  				continue;
>  			}
>  
> @@ -159,6 +176,10 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>  			return ITER_OK;
>  		}
>  	}
> +
> +error_out:
> +	dir_iterator_abort(dir_iterator);
> +	return ITER_ERROR;
>  }
>  
>  int dir_iterator_abort(struct dir_iterator *dir_iterator)
> @@ -182,7 +203,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
>  	return ITER_DONE;
>  }
>  
> -struct dir_iterator *dir_iterator_begin(const char *path)
> +struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags)
>  {
>  	struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
>  	struct dir_iterator *dir_iterator = &iter->base;
> @@ -195,6 +216,7 @@ struct dir_iterator *dir_iterator_begin(const char *path)
>  
>  	ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
>  
> +	iter->flags = flags;
>  	iter->levels_nr = 1;
>  	iter->levels[0].initialized = 0;
>  
> diff --git a/dir-iterator.h b/dir-iterator.h
> index 970793d07a..93646c3bea 100644
> --- a/dir-iterator.h
> +++ b/dir-iterator.h
> @@ -19,7 +19,7 @@
>   * A typical iteration looks like this:
>   *
>   *     int ok;
> - *     struct iterator *iter = dir_iterator_begin(path);
> + *     struct iterator *iter = dir_iterator_begin(path, 0);

Outside of this context, we already mentione errorhandling when
'ok != ITER_DONE' in his example.  This still can't happen with the
way the dir iterator is used here, but it serves as a reminder if
people are using the DIR_ITERATOR_PEDANTIC flag.  Good.

>   *
>   *     while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
>   *             if (want_to_stop_iteration()) {
> @@ -40,6 +40,20 @@
>   * dir_iterator_advance() again.
>   */
>  
> +/*
> + * Flags for dir_iterator_begin:
> + *
> + * - DIR_ITERATOR_PEDANTIC: override dir-iterator's default behavior
> + *   in case of an error while trying to fetch the next entry, which is
> + *   to emit a warning and keep going. With this flag, resouces are
> + *   freed and ITER_ERROR is return immediately.
> + *
> + * - DIR_ITERATOR_FOLLOW_SYMLINKS: make dir-iterator follow symlinks to
> + *   directories, i.e., iterate over linked directories' contents.
> + */
> +#define DIR_ITERATOR_PEDANTIC (1 << 0)
> +#define DIR_ITERATOR_FOLLOW_SYMLINKS (1 << 1)
> +
>  struct dir_iterator {
>  	/* The current path: */
>  	struct strbuf path;
> @@ -54,20 +68,28 @@ struct dir_iterator {
>  	/* The current basename: */
>  	const char *basename;
>  
> -	/* The result of calling lstat() on path: */
> +	/*
> +	 * The result of calling lstat() on path or stat(), if the
> +	 * DIR_ITERATOR_FOLLOW_SYMLINKS flag was set at
> +	 * dir_iterator's initialization.
> +	 */
>  	struct stat st;
>  };
>  
>  /*
> - * Start a directory iteration over path. Return a dir_iterator that
> - * holds the internal state of the iteration.
> + * Start a directory iteration over path with the combination of
> + * options specified by flags. Return a dir_iterator that holds the
> + * internal state of the iteration.
>   *
>   * The iteration includes all paths under path, not including path
>   * itself and not including "." or ".." entries.
>   *
> - * path is the starting directory. An internal copy will be made.
> + * Parameters are:
> + *  - path is the starting directory. An internal copy will be made.
> + *  - flags is a combination of the possible flags to initialize a
> + *    dir-iterator or 0 for default behaviour.
>   */
> -struct dir_iterator *dir_iterator_begin(const char *path);
> +struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags);
>  
>  /*
>   * Advance the iterator to the first or next item and return ITER_OK.
> @@ -76,6 +98,11 @@ struct dir_iterator *dir_iterator_begin(const char *path);
>   * dir_iterator and associated resources and return ITER_ERROR. It is
>   * a bug to use iterator or call this function again after it has
>   * returned ITER_DONE or ITER_ERROR.
> + *
> + * Note that whether dir-iterator will return ITER_ERROR when failing
> + * to fetch the next entry or just emit a warning and try to fetch the
> + * next is defined by the 'pedantic' option at dir-iterator's
> + * initialization.

I feel like at this point we are repeating documentation that already
exists for the flags.  Should we ever find a reason to return
ITER_ERROR without the pedantic flag, this comment is likely to become
out of date.  I think not adding this note is probably better in this
case.

>   */
>  int dir_iterator_advance(struct dir_iterator *iterator);
>  
> diff --git a/refs/files-backend.c b/refs/files-backend.c
> index ef053f716c..2ce9783097 100644
> --- a/refs/files-backend.c
> +++ b/refs/files-backend.c
> @@ -2143,7 +2143,7 @@ static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store,
>  
>  	base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
>  	strbuf_addf(&sb, "%s/logs", gitdir);
> -	iter->dir_iterator = dir_iterator_begin(sb.buf);
> +	iter->dir_iterator = dir_iterator_begin(sb.buf, 0);
>  	iter->ref_store = ref_store;
>  	strbuf_release(&sb);
>  
> -- 
> 2.20.1
>
Matheus Tavares Bernardino April 10, 2019, 8:24 p.m. UTC | #2
Hi, Thomas

Sorry for the late reply, but now that I submitted my GSoC proposal I
can finally come back to this series.

On Sun, Mar 31, 2019 at 3:12 PM Thomas Gummerer <t.gummerer@gmail.com> wrote:
>
> On 03/30, Matheus Tavares wrote:
> > Add the possibility of giving flags to dir_iterator_begin to initialize
> > a dir-iterator with special options.
> >
> > Currently possible flags are DIR_ITERATOR_PEDANTIC, which makes
> > dir_iterator_advance abort immediately in the case of an error while
> > trying to fetch next entry; and DIR_ITERATOR_FOLLOW_SYMLINKS, which
> > makes the iteration follow symlinks to directories and include its
> > contents in the iteration. These new flags will be used in a subsequent
> > patch.
> >
> > Also adjust refs/files-backend.c to the new dir_iterator_begin
> > signature.
> >
> > Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> > ---
> >  dir-iterator.c       | 28 +++++++++++++++++++++++++---
> >  dir-iterator.h       | 39 +++++++++++++++++++++++++++++++++------
> >  refs/files-backend.c |  2 +-
> >  3 files changed, 59 insertions(+), 10 deletions(-)
> >
> > diff --git a/dir-iterator.c b/dir-iterator.c
> > index f2dcd82fde..17aca8ea41 100644
> > --- a/dir-iterator.c
> > +++ b/dir-iterator.c
> > @@ -48,12 +48,16 @@ struct dir_iterator_int {
> >        * that will be included in this iteration.
> >        */
> >       struct dir_iterator_level *levels;
> > +
> > +     /* Combination of flags for this dir-iterator */
> > +     unsigned flags;
> >  };
> >
> >  int dir_iterator_advance(struct dir_iterator *dir_iterator)
> >  {
> >       struct dir_iterator_int *iter =
> >               (struct dir_iterator_int *)dir_iterator;
> > +     int ret;
>
> Minor nit: I'd define this variable closer to where it is actually
> used, inside the second 'while(1)' loop in this function.  That would
> make it clearer that it's only used there and not in other places in
> the function as well, which I had first expected when I read this.

Right, thanks.

> > diff --git a/dir-iterator.h b/dir-iterator.h
> > index 970793d07a..93646c3bea 100644
> > --- a/dir-iterator.h
> > +++ b/dir-iterator.h
> > @@ -19,7 +19,7 @@
> >   * A typical iteration looks like this:
> >   *
> >   *     int ok;
> > - *     struct iterator *iter = dir_iterator_begin(path);
> > + *     struct iterator *iter = dir_iterator_begin(path, 0);
>
> Outside of this context, we already mentione errorhandling when
> 'ok != ITER_DONE' in his example.  This still can't happen with the
> way the dir iterator is used here, but it serves as a reminder if
> people are using the DIR_ITERATOR_PEDANTIC flag.  Good.

This made me think again about the documentation saying that
dir_iterator_abort() and dir_iterator_advance() may return ITER_ERROR,
but the implementation does not containing these possibilities.
(Besides when the pedantic flag is used). Maybe the idea was to make
API-users implement the check for an ITER_ERROR in case dir-iterator
needs to start returning it in the future.

But do you think such a change in dir-iterator is likely to happen?
Maybe we could just make dir_iterator_abort() be void and remove this
section from documentation. Then, for dir_iterator_advance() users
would only need to check for ITER_ERROR if the pedantic flag was given
at dir-iterator creation...

Also CC-ed Michael in case he has some input
Thomas Gummerer April 11, 2019, 9:09 p.m. UTC | #3
On 04/10, Matheus Tavares Bernardino wrote:
> > > diff --git a/dir-iterator.h b/dir-iterator.h
> > > index 970793d07a..93646c3bea 100644
> > > --- a/dir-iterator.h
> > > +++ b/dir-iterator.h
> > > @@ -19,7 +19,7 @@
> > >   * A typical iteration looks like this:
> > >   *
> > >   *     int ok;
> > > - *     struct iterator *iter = dir_iterator_begin(path);
> > > + *     struct iterator *iter = dir_iterator_begin(path, 0);
> >
> > Outside of this context, we already mentione errorhandling when
> > 'ok != ITER_DONE' in his example.  This still can't happen with the
> > way the dir iterator is used here, but it serves as a reminder if
> > people are using the DIR_ITERATOR_PEDANTIC flag.  Good.
> 
> This made me think again about the documentation saying that
> dir_iterator_abort() and dir_iterator_advance() may return ITER_ERROR,
> but the implementation does not containing these possibilities.
> (Besides when the pedantic flag is used). Maybe the idea was to make
> API-users implement the check for an ITER_ERROR in case dir-iterator
> needs to start returning it in the future.

Yeah, I think that was the intention.

> But do you think such a change in dir-iterator is likely to happen?
> Maybe we could just make dir_iterator_abort() be void and remove this
> section from documentation. Then, for dir_iterator_advance() users
> would only need to check for ITER_ERROR if the pedantic flag was given
> at dir-iterator creation...

Dunno.  In a world where we have the pedantic flag, I think only
returning ITER_ERROR if that flag is given might be what we want to
do.  I can't think of a reason why we would want to return ITER_ERROR
without the pedantic flag in that case.

Though I think I would change the example the other way in that case,
and pass DIR_ITERATOR_PEDANTIC to 'dir_iterator_begin()', as it would
be easy to forget error handling otherwise, even when it is
necessary.  I'd rather err on the side of showing too much error
handling, than having people forget it and having users run into some
odd edge cases in the wild that the tests don't cover.

> Also CC-ed Michael in case he has some input
Matheus Tavares Bernardino April 23, 2019, 5:07 p.m. UTC | #4
On Thu, Apr 11, 2019 at 6:09 PM Thomas Gummerer <t.gummerer@gmail.com> wrote:
>
> On 04/10, Matheus Tavares Bernardino wrote:
> > > > diff --git a/dir-iterator.h b/dir-iterator.h
> > > > index 970793d07a..93646c3bea 100644
> > > > --- a/dir-iterator.h
> > > > +++ b/dir-iterator.h
> > > > @@ -19,7 +19,7 @@
> > > >   * A typical iteration looks like this:
> > > >   *
> > > >   *     int ok;
> > > > - *     struct iterator *iter = dir_iterator_begin(path);
> > > > + *     struct iterator *iter = dir_iterator_begin(path, 0);
> > >
> > > Outside of this context, we already mentione errorhandling when
> > > 'ok != ITER_DONE' in his example.  This still can't happen with the
> > > way the dir iterator is used here, but it serves as a reminder if
> > > people are using the DIR_ITERATOR_PEDANTIC flag.  Good.
> >
> > This made me think again about the documentation saying that
> > dir_iterator_abort() and dir_iterator_advance() may return ITER_ERROR,
> > but the implementation does not containing these possibilities.
> > (Besides when the pedantic flag is used). Maybe the idea was to make
> > API-users implement the check for an ITER_ERROR in case dir-iterator
> > needs to start returning it in the future.
>
> Yeah, I think that was the intention.
>
> > But do you think such a change in dir-iterator is likely to happen?
> > Maybe we could just make dir_iterator_abort() be void and remove this
> > section from documentation. Then, for dir_iterator_advance() users
> > would only need to check for ITER_ERROR if the pedantic flag was given
> > at dir-iterator creation...
>
> Dunno.  In a world where we have the pedantic flag, I think only
> returning ITER_ERROR if that flag is given might be what we want to
> do.  I can't think of a reason why we would want to return ITER_ERROR
> without the pedantic flag in that case.

Ok. I began doing the change, but got stuck in a specific decision.
What I was trying to do is:

1) Make dir_iterator_advance() return ITER_ERROR only when the
pedantic flag is given;
2) Make dir_iterator_abort() be void.

The first change is trivial. But the second is not so easy: Since the
[only] current API user defines other iterators on top of
dir-iterator, it would require a somehow big surgery on refs/* to make
this change. Should I proceed and make the changes at refs/* or should
I keep dir_iterator_abort() returning int, although it can never fail?

There's also a third option: The only operation that may fail during
dir_iterator_abort() is closedir(). But even on
dir_iterator_advance(), I'm treating this error as "non-fatal" in the
sense that it's not caught by the pedantic flag (although a warning is
emitted). I did it like this because it doesn't seem like a major
error during dir iteration... But I could change this and make
DIR_ITERATOR_PEDANTIC return ITER_ERROR upon closedir() errors for
both dir-iterator advance() and abort() functions. What do you think?

> Though I think I would change the example the other way in that case,
> and pass DIR_ITERATOR_PEDANTIC to 'dir_iterator_begin()', as it would
> be easy to forget error handling otherwise, even when it is
> necessary.  I'd rather err on the side of showing too much error
> handling, than having people forget it and having users run into some
> odd edge cases in the wild that the tests don't cover.

Yes, I agree.

> > Also CC-ed Michael in case he has some input
Thomas Gummerer April 24, 2019, 6:36 p.m. UTC | #5
On 04/23, Matheus Tavares Bernardino wrote:
> On Thu, Apr 11, 2019 at 6:09 PM Thomas Gummerer <t.gummerer@gmail.com> wrote:
> >
> > On 04/10, Matheus Tavares Bernardino wrote:
> > > > > diff --git a/dir-iterator.h b/dir-iterator.h
> > > > > index 970793d07a..93646c3bea 100644
> > > > > --- a/dir-iterator.h
> > > > > +++ b/dir-iterator.h
> > > > > @@ -19,7 +19,7 @@
> > > > >   * A typical iteration looks like this:
> > > > >   *
> > > > >   *     int ok;
> > > > > - *     struct iterator *iter = dir_iterator_begin(path);
> > > > > + *     struct iterator *iter = dir_iterator_begin(path, 0);
> > > >
> > > > Outside of this context, we already mentione errorhandling when
> > > > 'ok != ITER_DONE' in his example.  This still can't happen with the
> > > > way the dir iterator is used here, but it serves as a reminder if
> > > > people are using the DIR_ITERATOR_PEDANTIC flag.  Good.
> > >
> > > This made me think again about the documentation saying that
> > > dir_iterator_abort() and dir_iterator_advance() may return ITER_ERROR,
> > > but the implementation does not containing these possibilities.
> > > (Besides when the pedantic flag is used). Maybe the idea was to make
> > > API-users implement the check for an ITER_ERROR in case dir-iterator
> > > needs to start returning it in the future.
> >
> > Yeah, I think that was the intention.
> >
> > > But do you think such a change in dir-iterator is likely to happen?
> > > Maybe we could just make dir_iterator_abort() be void and remove this
> > > section from documentation. Then, for dir_iterator_advance() users
> > > would only need to check for ITER_ERROR if the pedantic flag was given
> > > at dir-iterator creation...
> >
> > Dunno.  In a world where we have the pedantic flag, I think only
> > returning ITER_ERROR if that flag is given might be what we want to
> > do.  I can't think of a reason why we would want to return ITER_ERROR
> > without the pedantic flag in that case.
> 
> Ok. I began doing the change, but got stuck in a specific decision.
> What I was trying to do is:
> 
> 1) Make dir_iterator_advance() return ITER_ERROR only when the
> pedantic flag is given;
> 2) Make dir_iterator_abort() be void.
> 
> The first change is trivial. But the second is not so easy: Since the
> [only] current API user defines other iterators on top of
> dir-iterator, it would require a somehow big surgery on refs/* to make
> this change. Should I proceed and make the changes at refs/* or should
> I keep dir_iterator_abort() returning int, although it can never fail?

Maybe I'm missing something, but wouldn't this change in refs.c be
enough? (Other than actually making dir_iterator_abort not return
anything)

	diff --git a/refs/files-backend.c b/refs/files-backend.c
	index 5848f32ef8..81863c3ee0 100644
	--- a/refs/files-backend.c
	+++ b/refs/files-backend.c
	@@ -2125,13 +2125,12 @@ static int files_reflog_iterator_abort(struct ref_iterator *ref_iterator)
	 {
	        struct files_reflog_iterator *iter =
	                (struct files_reflog_iterator *)ref_iterator;
	-       int ok = ITER_DONE;
	 
	        if (iter->dir_iterator)
	-               ok = dir_iterator_abort(iter->dir_iterator);
	+               dir_iterator_abort(iter->dir_iterator);
	 
	        base_ref_iterator_free(ref_iterator);
	-       return ok;
	+       return ITER_DONE;
	 }
	 
	 static struct ref_iterator_vtable files_reflog_iterator_vtable = {

Currently the only thing calling dir_iterator_abort() is
files_reflog_iterator_abort() from what I can see, and
dir_iterator_abort() always returns ITER_DONE.

That said, I don't know if this is actually worth pursuing.  Having it
return some value and having the caller check that makes it more
future proof, as we won't have to change all the callers in the future
if we want to start returning anything other than ITER_DONE.   Just
leaving it as it is now doesn't actually hurt anybody I think, but may
help in the future.

> There's also a third option: The only operation that may fail during
> dir_iterator_abort() is closedir(). But even on
> dir_iterator_advance(), I'm treating this error as "non-fatal" in the
> sense that it's not caught by the pedantic flag (although a warning is
> emitted). I did it like this because it doesn't seem like a major
> error during dir iteration... But I could change this and make
> DIR_ITERATOR_PEDANTIC return ITER_ERROR upon closedir() errors for
> both dir-iterator advance() and abort() functions. What do you think?

I think this might be the right way to go.  We don't really need an
error from closedir, but at the same time if we are being pedantic,
maybe it should be an error.  I don't have a strong opinion here
either way, other than I think it should probably keep returning an
int.

> > Though I think I would change the example the other way in that case,
> > and pass DIR_ITERATOR_PEDANTIC to 'dir_iterator_begin()', as it would
> > be easy to forget error handling otherwise, even when it is
> > necessary.  I'd rather err on the side of showing too much error
> > handling, than having people forget it and having users run into some
> > odd edge cases in the wild that the tests don't cover.
> 
> Yes, I agree.
> 
> > > Also CC-ed Michael in case he has some input
Matheus Tavares Bernardino April 26, 2019, 4:13 a.m. UTC | #6
On Wed, Apr 24, 2019 at 3:36 PM Thomas Gummerer <t.gummerer@gmail.com> wrote:
>
> On 04/23, Matheus Tavares Bernardino wrote:
> > On Thu, Apr 11, 2019 at 6:09 PM Thomas Gummerer <t.gummerer@gmail.com> wrote:
> > >
> > > On 04/10, Matheus Tavares Bernardino wrote:
> > > > > > diff --git a/dir-iterator.h b/dir-iterator.h
> > > > > > index 970793d07a..93646c3bea 100644
> > > > > > --- a/dir-iterator.h
> > > > > > +++ b/dir-iterator.h
> > > > > > @@ -19,7 +19,7 @@
> > > > > >   * A typical iteration looks like this:
> > > > > >   *
> > > > > >   *     int ok;
> > > > > > - *     struct iterator *iter = dir_iterator_begin(path);
> > > > > > + *     struct iterator *iter = dir_iterator_begin(path, 0);
> > > > >
> > > > > Outside of this context, we already mentione errorhandling when
> > > > > 'ok != ITER_DONE' in his example.  This still can't happen with the
> > > > > way the dir iterator is used here, but it serves as a reminder if
> > > > > people are using the DIR_ITERATOR_PEDANTIC flag.  Good.
> > > >
> > > > This made me think again about the documentation saying that
> > > > dir_iterator_abort() and dir_iterator_advance() may return ITER_ERROR,
> > > > but the implementation does not containing these possibilities.
> > > > (Besides when the pedantic flag is used). Maybe the idea was to make
> > > > API-users implement the check for an ITER_ERROR in case dir-iterator
> > > > needs to start returning it in the future.
> > >
> > > Yeah, I think that was the intention.
> > >
> > > > But do you think such a change in dir-iterator is likely to happen?
> > > > Maybe we could just make dir_iterator_abort() be void and remove this
> > > > section from documentation. Then, for dir_iterator_advance() users
> > > > would only need to check for ITER_ERROR if the pedantic flag was given
> > > > at dir-iterator creation...
> > >
> > > Dunno.  In a world where we have the pedantic flag, I think only
> > > returning ITER_ERROR if that flag is given might be what we want to
> > > do.  I can't think of a reason why we would want to return ITER_ERROR
> > > without the pedantic flag in that case.
> >
> > Ok. I began doing the change, but got stuck in a specific decision.
> > What I was trying to do is:
> >
> > 1) Make dir_iterator_advance() return ITER_ERROR only when the
> > pedantic flag is given;
> > 2) Make dir_iterator_abort() be void.
> >
> > The first change is trivial. But the second is not so easy: Since the
> > [only] current API user defines other iterators on top of
> > dir-iterator, it would require a somehow big surgery on refs/* to make
> > this change. Should I proceed and make the changes at refs/* or should
> > I keep dir_iterator_abort() returning int, although it can never fail?
>
> Maybe I'm missing something, but wouldn't this change in refs.c be
> enough? (Other than actually making dir_iterator_abort not return
> anything)
>
>         diff --git a/refs/files-backend.c b/refs/files-backend.c
>         index 5848f32ef8..81863c3ee0 100644
>         --- a/refs/files-backend.c
>         +++ b/refs/files-backend.c
>         @@ -2125,13 +2125,12 @@ static int files_reflog_iterator_abort(struct ref_iterator *ref_iterator)
>          {
>                 struct files_reflog_iterator *iter =
>                         (struct files_reflog_iterator *)ref_iterator;
>         -       int ok = ITER_DONE;
>
>                 if (iter->dir_iterator)
>         -               ok = dir_iterator_abort(iter->dir_iterator);
>         +               dir_iterator_abort(iter->dir_iterator);
>
>                 base_ref_iterator_free(ref_iterator);
>         -       return ok;
>         +       return ITER_DONE;
>          }
>
>          static struct ref_iterator_vtable files_reflog_iterator_vtable = {

Yes, indeed. But I thought that since the reason for making
dir_iterator_abort() be void is that it always returns ITER_DONE, the
same change should be applied to files_reflog_iterator_abort() as it
would fall into the same case. And this, in turn, would require
changes to ref_iterator_abort() and many other functions at
refs/iterator.c and refs/files-backend.c

> Currently the only thing calling dir_iterator_abort() is
> files_reflog_iterator_abort() from what I can see, and
> dir_iterator_abort() always returns ITER_DONE.
>
> That said, I don't know if this is actually worth pursuing.  Having it
> return some value and having the caller check that makes it more
> future proof, as we won't have to change all the callers in the future
> if we want to start returning anything other than ITER_DONE.   Just
> leaving it as it is now doesn't actually hurt anybody I think, but may
> help in the future.

Ok, I understand.

> > There's also a third option: The only operation that may fail during
> > dir_iterator_abort() is closedir(). But even on
> > dir_iterator_advance(), I'm treating this error as "non-fatal" in the
> > sense that it's not caught by the pedantic flag (although a warning is
> > emitted). I did it like this because it doesn't seem like a major
> > error during dir iteration... But I could change this and make
> > DIR_ITERATOR_PEDANTIC return ITER_ERROR upon closedir() errors for
> > both dir-iterator advance() and abort() functions. What do you think?
>
> I think this might be the right way to go.  We don't really need an
> error from closedir, but at the same time if we are being pedantic,
> maybe it should be an error.  I don't have a strong opinion here
> either way, other than I think it should probably keep returning an
> int.

I know I suggested this option, but searching the code base I saw no
other place that checks closedir()'s return besides dir-iterator. So
maybe the best option would be to keep dir_iterator_abort() always
returning ITER_DONE, even upon closedir() errors. Them, I can document
that the pedantic flag only affects dir_iterator_advance() behavior
(but closedir() errors wouldn't be considered here as well).

I got stuck in this for a while, but finally this option seems good to me now...

> > > Though I think I would change the example the other way in that case,
> > > and pass DIR_ITERATOR_PEDANTIC to 'dir_iterator_begin()', as it would
> > > be easy to forget error handling otherwise, even when it is
> > > necessary.  I'd rather err on the side of showing too much error
> > > handling, than having people forget it and having users run into some
> > > odd edge cases in the wild that the tests don't cover.
> >
> > Yes, I agree.
> >
> > > > Also CC-ed Michael in case he has some input

Patch
diff mbox series

diff --git a/dir-iterator.c b/dir-iterator.c
index f2dcd82fde..17aca8ea41 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -48,12 +48,16 @@  struct dir_iterator_int {
 	 * that will be included in this iteration.
 	 */
 	struct dir_iterator_level *levels;
+
+	/* Combination of flags for this dir-iterator */
+	unsigned flags;
 };
 
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
 	struct dir_iterator_int *iter =
 		(struct dir_iterator_int *)dir_iterator;
+	int ret;
 
 	while (1) {
 		struct dir_iterator_level *level =
@@ -71,6 +75,8 @@  int dir_iterator_advance(struct dir_iterator *dir_iterator)
 
 			level->dir = opendir(iter->base.path.buf);
 			if (!level->dir && errno != ENOENT) {
+				if (iter->flags & DIR_ITERATOR_PEDANTIC)
+					goto error_out;
 				warning("error opening directory %s: %s",
 					iter->base.path.buf, strerror(errno));
 				/* Popping the level is handled below */
@@ -122,6 +128,8 @@  int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			if (!de) {
 				/* This level is exhausted; pop up a level. */
 				if (errno) {
+					if (iter->flags & DIR_ITERATOR_PEDANTIC)
+						goto error_out;
 					warning("error reading directory %s: %s",
 						iter->base.path.buf, strerror(errno));
 				} else if (closedir(level->dir))
@@ -138,11 +146,20 @@  int dir_iterator_advance(struct dir_iterator *dir_iterator)
 				continue;
 
 			strbuf_addstr(&iter->base.path, de->d_name);
-			if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
-				if (errno != ENOENT)
+
+			if (iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS)
+				ret = stat(iter->base.path.buf, &iter->base.st);
+			else
+				ret = lstat(iter->base.path.buf, &iter->base.st);
+
+			if (ret < 0) {
+				if (errno != ENOENT) {
+					if (iter->flags & DIR_ITERATOR_PEDANTIC)
+						goto error_out;
 					warning("error reading path '%s': %s",
 						iter->base.path.buf,
 						strerror(errno));
+				}
 				continue;
 			}
 
@@ -159,6 +176,10 @@  int dir_iterator_advance(struct dir_iterator *dir_iterator)
 			return ITER_OK;
 		}
 	}
+
+error_out:
+	dir_iterator_abort(dir_iterator);
+	return ITER_ERROR;
 }
 
 int dir_iterator_abort(struct dir_iterator *dir_iterator)
@@ -182,7 +203,7 @@  int dir_iterator_abort(struct dir_iterator *dir_iterator)
 	return ITER_DONE;
 }
 
-struct dir_iterator *dir_iterator_begin(const char *path)
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags)
 {
 	struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
 	struct dir_iterator *dir_iterator = &iter->base;
@@ -195,6 +216,7 @@  struct dir_iterator *dir_iterator_begin(const char *path)
 
 	ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
 
+	iter->flags = flags;
 	iter->levels_nr = 1;
 	iter->levels[0].initialized = 0;
 
diff --git a/dir-iterator.h b/dir-iterator.h
index 970793d07a..93646c3bea 100644
--- a/dir-iterator.h
+++ b/dir-iterator.h
@@ -19,7 +19,7 @@ 
  * A typical iteration looks like this:
  *
  *     int ok;
- *     struct iterator *iter = dir_iterator_begin(path);
+ *     struct iterator *iter = dir_iterator_begin(path, 0);
  *
  *     while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
  *             if (want_to_stop_iteration()) {
@@ -40,6 +40,20 @@ 
  * dir_iterator_advance() again.
  */
 
+/*
+ * Flags for dir_iterator_begin:
+ *
+ * - DIR_ITERATOR_PEDANTIC: override dir-iterator's default behavior
+ *   in case of an error while trying to fetch the next entry, which is
+ *   to emit a warning and keep going. With this flag, resouces are
+ *   freed and ITER_ERROR is return immediately.
+ *
+ * - DIR_ITERATOR_FOLLOW_SYMLINKS: make dir-iterator follow symlinks to
+ *   directories, i.e., iterate over linked directories' contents.
+ */
+#define DIR_ITERATOR_PEDANTIC (1 << 0)
+#define DIR_ITERATOR_FOLLOW_SYMLINKS (1 << 1)
+
 struct dir_iterator {
 	/* The current path: */
 	struct strbuf path;
@@ -54,20 +68,28 @@  struct dir_iterator {
 	/* The current basename: */
 	const char *basename;
 
-	/* The result of calling lstat() on path: */
+	/*
+	 * The result of calling lstat() on path or stat(), if the
+	 * DIR_ITERATOR_FOLLOW_SYMLINKS flag was set at
+	 * dir_iterator's initialization.
+	 */
 	struct stat st;
 };
 
 /*
- * Start a directory iteration over path. Return a dir_iterator that
- * holds the internal state of the iteration.
+ * Start a directory iteration over path with the combination of
+ * options specified by flags. Return a dir_iterator that holds the
+ * internal state of the iteration.
  *
  * The iteration includes all paths under path, not including path
  * itself and not including "." or ".." entries.
  *
- * path is the starting directory. An internal copy will be made.
+ * Parameters are:
+ *  - path is the starting directory. An internal copy will be made.
+ *  - flags is a combination of the possible flags to initialize a
+ *    dir-iterator or 0 for default behaviour.
  */
-struct dir_iterator *dir_iterator_begin(const char *path);
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned flags);
 
 /*
  * Advance the iterator to the first or next item and return ITER_OK.
@@ -76,6 +98,11 @@  struct dir_iterator *dir_iterator_begin(const char *path);
  * dir_iterator and associated resources and return ITER_ERROR. It is
  * a bug to use iterator or call this function again after it has
  * returned ITER_DONE or ITER_ERROR.
+ *
+ * Note that whether dir-iterator will return ITER_ERROR when failing
+ * to fetch the next entry or just emit a warning and try to fetch the
+ * next is defined by the 'pedantic' option at dir-iterator's
+ * initialization.
  */
 int dir_iterator_advance(struct dir_iterator *iterator);
 
diff --git a/refs/files-backend.c b/refs/files-backend.c
index ef053f716c..2ce9783097 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2143,7 +2143,7 @@  static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store,
 
 	base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
 	strbuf_addf(&sb, "%s/logs", gitdir);
-	iter->dir_iterator = dir_iterator_begin(sb.buf);
+	iter->dir_iterator = dir_iterator_begin(sb.buf, 0);
 	iter->ref_store = ref_store;
 	strbuf_release(&sb);