Message ID | 20201127231916.609852-2-sandals@crustytoothpaste.net (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rev-parse options for absolute or relative paths | expand |
Am 28.11.20 um 00:19 schrieb brian m. carlson: > We'd like to canonicalize paths such that we can preserve any number of > trailing components that may be missing. Let's add a function to do > that, taking the number of components to canonicalize, and make > strbuf_realpath a wrapper around it that allows just one component. > > Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> > --- > abspath.c | 33 ++++++++++++++++++++++++++++++++- > cache.h | 2 ++ > 2 files changed, 34 insertions(+), 1 deletion(-) > > diff --git a/abspath.c b/abspath.c > index 6f15a418bb..1d8f3d007c 100644 > --- a/abspath.c > +++ b/abspath.c > @@ -20,6 +20,7 @@ static void strip_last_component(struct strbuf *path) > /* Find start of the last component */ > while (offset < len && !is_dir_sep(path->buf[len - 1])) > len--; > + > /* Skip sequences of multiple path-separators */ > while (offset < len && is_dir_sep(path->buf[len - 1])) > len--; Stray change? > @@ -66,6 +67,22 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) > #define MAXSYMLINKS 32 > #endif > > +/* Count non-contiguous directory separators, not including a trailing one. */ > +static int count_dir_separators(const char *s) > +{ > + int count = 0; > + int last_sep = 0; > + const char *p = s; > + while (*p) { > + int is_sep = is_dir_sep(*p++); > + if (is_sep && !last_sep) > + count++; > + last_sep = is_sep; > + } > + return count; > +} count_dir_separators("a/") returns 1; is this intended? It seems to contradict the "not including a trailing one" comment. > + > + Nitpicking: Do we need both empty lines? > /* > * Return the real path (i.e., absolute path, with symlinks resolved > * and extra slashes removed) equivalent to the specified path. (If > @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) > */ > char *strbuf_realpath(struct strbuf *resolved, const char *path, > int die_on_error) > +{ > + return strbuf_realpath_missing(resolved, path, 1, die_on_error); > +} > + > +/* > + * Just like strbuf_realpath, but allows specifying how many missing components > + * are permitted. -1 may be specified to allow an unlimited number. > + */ > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, > + int missing_components, int die_on_error) > { > struct strbuf remaining = STRBUF_INIT; > struct strbuf next = STRBUF_INIT; > @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, > strbuf_addbuf(resolved, &next); > > if (lstat(resolved->buf, &st)) { > + int trailing_components = count_dir_separators(remaining.buf) + > + (remaining.len != 0); Hmm, so you actually want to count path components, not separators. Perhaps like this? static size_t count_components(const char *p) { size_t n = 0; while (*p) { while (*p && !is_dir_sep(*p)) p++; while (is_dir_sep(*p)) p++; n++; } return n; } > /* error out unless this was the last component */ > - if (errno != ENOENT || remaining.len) { > + if (errno != ENOENT || > + !(missing_components == -1 || > + trailing_components < missing_components)) { > if (die_on_error) > die_errno("Invalid path '%s'", > resolved->buf); > diff --git a/cache.h b/cache.h > index c0072d43b1..ee4bc5ec04 100644 > --- a/cache.h > +++ b/cache.h > @@ -1320,6 +1320,8 @@ static inline int is_absolute_path(const char *path) > int is_directory(const char *); > char *strbuf_realpath(struct strbuf *resolved, const char *path, > int die_on_error); > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, > + int missing_components, int die_on_error); > char *real_pathdup(const char *path, int die_on_error); > const char *absolute_path(const char *path); > char *absolute_pathdup(const char *path); >
On 2020-11-28 at 10:08:09, René Scharfe wrote: > Am 28.11.20 um 00:19 schrieb brian m. carlson: > > We'd like to canonicalize paths such that we can preserve any number of > > trailing components that may be missing. Let's add a function to do > > that, taking the number of components to canonicalize, and make > > strbuf_realpath a wrapper around it that allows just one component. > > > > Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> > > --- > > abspath.c | 33 ++++++++++++++++++++++++++++++++- > > cache.h | 2 ++ > > 2 files changed, 34 insertions(+), 1 deletion(-) > > > > diff --git a/abspath.c b/abspath.c > > index 6f15a418bb..1d8f3d007c 100644 > > --- a/abspath.c > > +++ b/abspath.c > > @@ -20,6 +20,7 @@ static void strip_last_component(struct strbuf *path) > > /* Find start of the last component */ > > while (offset < len && !is_dir_sep(path->buf[len - 1])) > > len--; > > + > > /* Skip sequences of multiple path-separators */ > > while (offset < len && is_dir_sep(path->buf[len - 1])) > > len--; > > Stray change? Ah, yes. I pulled out the old code from v2, but left the whitespace. Will fix. > Nitpicking: Do we need both empty lines? No, we don't. > > /* > > * Return the real path (i.e., absolute path, with symlinks resolved > > * and extra slashes removed) equivalent to the specified path. (If > > @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) > > */ > > char *strbuf_realpath(struct strbuf *resolved, const char *path, > > int die_on_error) > > +{ > > + return strbuf_realpath_missing(resolved, path, 1, die_on_error); > > +} > > + > > +/* > > + * Just like strbuf_realpath, but allows specifying how many missing components > > + * are permitted. -1 may be specified to allow an unlimited number. > > + */ > > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, > > + int missing_components, int die_on_error) > > { > > struct strbuf remaining = STRBUF_INIT; > > struct strbuf next = STRBUF_INIT; > > @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, > > strbuf_addbuf(resolved, &next); > > > > if (lstat(resolved->buf, &st)) { > > + int trailing_components = count_dir_separators(remaining.buf) + > > + (remaining.len != 0); > > Hmm, so you actually want to count path components, not separators. > Perhaps like this? > > static size_t count_components(const char *p) > { > size_t n = 0; > while (*p) { > while (*p && !is_dir_sep(*p)) > p++; > while (is_dir_sep(*p)) > p++; > n++; > } > return n; > } Yeah, I think that's nicer, and simpler, too. Will reroll with that fix.
Hi brian, On Fri, 27 Nov 2020, brian m. carlson wrote: > @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) > */ > char *strbuf_realpath(struct strbuf *resolved, const char *path, > int die_on_error) > +{ > + return strbuf_realpath_missing(resolved, path, 1, die_on_error); > +} > + > +/* > + * Just like strbuf_realpath, but allows specifying how many missing components > + * are permitted. -1 may be specified to allow an unlimited number. > + */ > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, > + int missing_components, int die_on_error) I am having a hard time with the name `missing_components`. Taking a step back, I think that we are interested in essentially two modes: allow one missing component, or an arbitrary amount of missing components. If this assumption is correct, we may not even need to count components at all. We only need a flag indicating whether we allow only the last component to be missing, or any number of components. Maybe something like `error_on_missing_parent_directory` or some such? Ciao, Dscho > { > struct strbuf remaining = STRBUF_INIT; > struct strbuf next = STRBUF_INIT; > @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, > strbuf_addbuf(resolved, &next); > > if (lstat(resolved->buf, &st)) { > + int trailing_components = count_dir_separators(remaining.buf) + > + (remaining.len != 0); > /* error out unless this was the last component */ > - if (errno != ENOENT || remaining.len) { > + if (errno != ENOENT || > + !(missing_components == -1 || > + trailing_components < missing_components)) { > if (die_on_error) > die_errno("Invalid path '%s'", > resolved->buf); > diff --git a/cache.h b/cache.h > index c0072d43b1..ee4bc5ec04 100644 > --- a/cache.h > +++ b/cache.h > @@ -1320,6 +1320,8 @@ static inline int is_absolute_path(const char *path) > int is_directory(const char *); > char *strbuf_realpath(struct strbuf *resolved, const char *path, > int die_on_error); > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, > + int missing_components, int die_on_error); > char *real_pathdup(const char *path, int die_on_error); > const char *absolute_path(const char *path); > char *absolute_pathdup(const char *path); >
On 2020-12-02 at 13:09:49, Johannes Schindelin wrote: > I am having a hard time with the name `missing_components`. Taking a step > back, I think that we are interested in essentially two modes: allow one > missing component, or an arbitrary amount of missing components. > > If this assumption is correct, we may not even need to count components at > all. We only need a flag indicating whether we allow only the last > component to be missing, or any number of components. Maybe something like > `error_on_missing_parent_directory` or some such? That's certainly easier. I hope to get a reroll out this week, and I'll simplify when I do that.
diff --git a/abspath.c b/abspath.c index 6f15a418bb..1d8f3d007c 100644 --- a/abspath.c +++ b/abspath.c @@ -20,6 +20,7 @@ static void strip_last_component(struct strbuf *path) /* Find start of the last component */ while (offset < len && !is_dir_sep(path->buf[len - 1])) len--; + /* Skip sequences of multiple path-separators */ while (offset < len && is_dir_sep(path->buf[len - 1])) len--; @@ -66,6 +67,22 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) #define MAXSYMLINKS 32 #endif +/* Count non-contiguous directory separators, not including a trailing one. */ +static int count_dir_separators(const char *s) +{ + int count = 0; + int last_sep = 0; + const char *p = s; + while (*p) { + int is_sep = is_dir_sep(*p++); + if (is_sep && !last_sep) + count++; + last_sep = is_sep; + } + return count; +} + + /* * Return the real path (i.e., absolute path, with symlinks resolved * and extra slashes removed) equivalent to the specified path. (If @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) */ char *strbuf_realpath(struct strbuf *resolved, const char *path, int die_on_error) +{ + return strbuf_realpath_missing(resolved, path, 1, die_on_error); +} + +/* + * Just like strbuf_realpath, but allows specifying how many missing components + * are permitted. -1 may be specified to allow an unlimited number. + */ +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, + int missing_components, int die_on_error) { struct strbuf remaining = STRBUF_INIT; struct strbuf next = STRBUF_INIT; @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, strbuf_addbuf(resolved, &next); if (lstat(resolved->buf, &st)) { + int trailing_components = count_dir_separators(remaining.buf) + + (remaining.len != 0); /* error out unless this was the last component */ - if (errno != ENOENT || remaining.len) { + if (errno != ENOENT || + !(missing_components == -1 || + trailing_components < missing_components)) { if (die_on_error) die_errno("Invalid path '%s'", resolved->buf); diff --git a/cache.h b/cache.h index c0072d43b1..ee4bc5ec04 100644 --- a/cache.h +++ b/cache.h @@ -1320,6 +1320,8 @@ static inline int is_absolute_path(const char *path) int is_directory(const char *); char *strbuf_realpath(struct strbuf *resolved, const char *path, int die_on_error); +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path, + int missing_components, int die_on_error); char *real_pathdup(const char *path, int die_on_error); const char *absolute_path(const char *path); char *absolute_pathdup(const char *path);
We'd like to canonicalize paths such that we can preserve any number of trailing components that may be missing. Let's add a function to do that, taking the number of components to canonicalize, and make strbuf_realpath a wrapper around it that allows just one component. Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> --- abspath.c | 33 ++++++++++++++++++++++++++++++++- cache.h | 2 ++ 2 files changed, 34 insertions(+), 1 deletion(-)