diff mbox series

[v3,1/2] abspath: add a function to resolve paths with missing components

Message ID 20201127231916.609852-2-sandals@crustytoothpaste.net (mailing list archive)
State Superseded
Headers show
Series rev-parse options for absolute or relative paths | expand

Commit Message

brian m. carlson Nov. 27, 2020, 11:19 p.m. UTC
We'd like to canonicalize paths such that we can preserve any number of
trailing components that may be missing.  Let's add a function to do
that, taking the number of components to canonicalize, and make
strbuf_realpath a wrapper around it that allows just one component.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 abspath.c | 33 ++++++++++++++++++++++++++++++++-
 cache.h   |  2 ++
 2 files changed, 34 insertions(+), 1 deletion(-)

Comments

René Scharfe Nov. 28, 2020, 10:08 a.m. UTC | #1
Am 28.11.20 um 00:19 schrieb brian m. carlson:
> We'd like to canonicalize paths such that we can preserve any number of
> trailing components that may be missing.  Let's add a function to do
> that, taking the number of components to canonicalize, and make
> strbuf_realpath a wrapper around it that allows just one component.
>
> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---
>  abspath.c | 33 ++++++++++++++++++++++++++++++++-
>  cache.h   |  2 ++
>  2 files changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/abspath.c b/abspath.c
> index 6f15a418bb..1d8f3d007c 100644
> --- a/abspath.c
> +++ b/abspath.c
> @@ -20,6 +20,7 @@ static void strip_last_component(struct strbuf *path)
>  	/* Find start of the last component */
>  	while (offset < len && !is_dir_sep(path->buf[len - 1]))
>  		len--;
> +
>  	/* Skip sequences of multiple path-separators */
>  	while (offset < len && is_dir_sep(path->buf[len - 1]))
>  		len--;

Stray change?

> @@ -66,6 +67,22 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
>  #define MAXSYMLINKS 32
>  #endif
>
> +/* Count non-contiguous directory separators, not including a trailing one. */
> +static int count_dir_separators(const char *s)
> +{
> +	int count = 0;
> +	int last_sep = 0;
> +	const char *p = s;
> +	while (*p) {
> +		int is_sep = is_dir_sep(*p++);
> +		if (is_sep && !last_sep)
> +			count++;
> +		last_sep = is_sep;
> +	}
> +	return count;
> +}

count_dir_separators("a/") returns 1; is this intended?  It seems to
contradict the "not including a trailing one" comment.

> +
> +

Nitpicking: Do we need both empty lines?

>  /*
>   * Return the real path (i.e., absolute path, with symlinks resolved
>   * and extra slashes removed) equivalent to the specified path.  (If
> @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
>   */
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error)
> +{
> +	return strbuf_realpath_missing(resolved, path, 1, die_on_error);
> +}
> +
> +/*
> + * Just like strbuf_realpath, but allows specifying how many missing components
> + * are permitted.  -1 may be specified to allow an unlimited number.
> + */
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
> +			      int missing_components, int die_on_error)
>  {
>  	struct strbuf remaining = STRBUF_INIT;
>  	struct strbuf next = STRBUF_INIT;
> @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		strbuf_addbuf(resolved, &next);
>
>  		if (lstat(resolved->buf, &st)) {
> +			int trailing_components = count_dir_separators(remaining.buf) +
> +						  (remaining.len != 0);

Hmm, so you actually want to count path components, not separators.
Perhaps like this?

	static size_t count_components(const char *p)
	{
		size_t n = 0;
		while (*p) {
			while (*p && !is_dir_sep(*p))
				p++;
			while (is_dir_sep(*p))
				p++;
			n++;
		}
		return n;
	}

>  			/* error out unless this was the last component */
> -			if (errno != ENOENT || remaining.len) {
> +			if (errno != ENOENT ||
> +			    !(missing_components == -1 ||
> +			      trailing_components < missing_components)) {
>  				if (die_on_error)
>  					die_errno("Invalid path '%s'",
>  						  resolved->buf);
> diff --git a/cache.h b/cache.h
> index c0072d43b1..ee4bc5ec04 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1320,6 +1320,8 @@ static inline int is_absolute_path(const char *path)
>  int is_directory(const char *);
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error);
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
> +			      int missing_components, int die_on_error);
>  char *real_pathdup(const char *path, int die_on_error);
>  const char *absolute_path(const char *path);
>  char *absolute_pathdup(const char *path);
>
brian m. carlson Nov. 28, 2020, 6:41 p.m. UTC | #2
On 2020-11-28 at 10:08:09, René Scharfe wrote:
> Am 28.11.20 um 00:19 schrieb brian m. carlson:
> > We'd like to canonicalize paths such that we can preserve any number of
> > trailing components that may be missing.  Let's add a function to do
> > that, taking the number of components to canonicalize, and make
> > strbuf_realpath a wrapper around it that allows just one component.
> >
> > Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> > ---
> >  abspath.c | 33 ++++++++++++++++++++++++++++++++-
> >  cache.h   |  2 ++
> >  2 files changed, 34 insertions(+), 1 deletion(-)
> >
> > diff --git a/abspath.c b/abspath.c
> > index 6f15a418bb..1d8f3d007c 100644
> > --- a/abspath.c
> > +++ b/abspath.c
> > @@ -20,6 +20,7 @@ static void strip_last_component(struct strbuf *path)
> >  	/* Find start of the last component */
> >  	while (offset < len && !is_dir_sep(path->buf[len - 1]))
> >  		len--;
> > +
> >  	/* Skip sequences of multiple path-separators */
> >  	while (offset < len && is_dir_sep(path->buf[len - 1]))
> >  		len--;
> 
> Stray change?

Ah, yes.  I pulled out the old code from v2, but left the whitespace.
Will fix.

> Nitpicking: Do we need both empty lines?

No, we don't.

> >  /*
> >   * Return the real path (i.e., absolute path, with symlinks resolved
> >   * and extra slashes removed) equivalent to the specified path.  (If
> > @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
> >   */
> >  char *strbuf_realpath(struct strbuf *resolved, const char *path,
> >  		      int die_on_error)
> > +{
> > +	return strbuf_realpath_missing(resolved, path, 1, die_on_error);
> > +}
> > +
> > +/*
> > + * Just like strbuf_realpath, but allows specifying how many missing components
> > + * are permitted.  -1 may be specified to allow an unlimited number.
> > + */
> > +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
> > +			      int missing_components, int die_on_error)
> >  {
> >  	struct strbuf remaining = STRBUF_INIT;
> >  	struct strbuf next = STRBUF_INIT;
> > @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
> >  		strbuf_addbuf(resolved, &next);
> >
> >  		if (lstat(resolved->buf, &st)) {
> > +			int trailing_components = count_dir_separators(remaining.buf) +
> > +						  (remaining.len != 0);
> 
> Hmm, so you actually want to count path components, not separators.
> Perhaps like this?
> 
> 	static size_t count_components(const char *p)
> 	{
> 		size_t n = 0;
> 		while (*p) {
> 			while (*p && !is_dir_sep(*p))
> 				p++;
> 			while (is_dir_sep(*p))
> 				p++;
> 			n++;
> 		}
> 		return n;
> 	}

Yeah, I think that's nicer, and simpler, too.  Will reroll with that
fix.
Johannes Schindelin Dec. 2, 2020, 1:09 p.m. UTC | #3
Hi brian,

On Fri, 27 Nov 2020, brian m. carlson wrote:

> @@ -80,6 +97,16 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
>   */
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error)
> +{
> +	return strbuf_realpath_missing(resolved, path, 1, die_on_error);
> +}
> +
> +/*
> + * Just like strbuf_realpath, but allows specifying how many missing components
> + * are permitted.  -1 may be specified to allow an unlimited number.
> + */
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
> +			      int missing_components, int die_on_error)

I am having a hard time with the name `missing_components`. Taking a step
back, I think that we are interested in essentially two modes: allow one
missing component, or an arbitrary amount of missing components.

If this assumption is correct, we may not even need to count components at
all. We only need a flag indicating whether we allow only the last
component to be missing, or any number of components. Maybe something like
`error_on_missing_parent_directory` or some such?

Ciao,
Dscho

>  {
>  	struct strbuf remaining = STRBUF_INIT;
>  	struct strbuf next = STRBUF_INIT;
> @@ -128,8 +155,12 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		strbuf_addbuf(resolved, &next);
>
>  		if (lstat(resolved->buf, &st)) {
> +			int trailing_components = count_dir_separators(remaining.buf) +
> +						  (remaining.len != 0);
>  			/* error out unless this was the last component */
> -			if (errno != ENOENT || remaining.len) {
> +			if (errno != ENOENT ||
> +			    !(missing_components == -1 ||
> +			      trailing_components < missing_components)) {
>  				if (die_on_error)
>  					die_errno("Invalid path '%s'",
>  						  resolved->buf);
> diff --git a/cache.h b/cache.h
> index c0072d43b1..ee4bc5ec04 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1320,6 +1320,8 @@ static inline int is_absolute_path(const char *path)
>  int is_directory(const char *);
>  char *strbuf_realpath(struct strbuf *resolved, const char *path,
>  		      int die_on_error);
> +char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
> +			      int missing_components, int die_on_error);
>  char *real_pathdup(const char *path, int die_on_error);
>  const char *absolute_path(const char *path);
>  char *absolute_pathdup(const char *path);
>
brian m. carlson Dec. 2, 2020, 11:54 p.m. UTC | #4
On 2020-12-02 at 13:09:49, Johannes Schindelin wrote:
> I am having a hard time with the name `missing_components`. Taking a step
> back, I think that we are interested in essentially two modes: allow one
> missing component, or an arbitrary amount of missing components.
> 
> If this assumption is correct, we may not even need to count components at
> all. We only need a flag indicating whether we allow only the last
> component to be missing, or any number of components. Maybe something like
> `error_on_missing_parent_directory` or some such?

That's certainly easier.  I hope to get a reroll out this week, and I'll
simplify when I do that.
diff mbox series

Patch

diff --git a/abspath.c b/abspath.c
index 6f15a418bb..1d8f3d007c 100644
--- a/abspath.c
+++ b/abspath.c
@@ -20,6 +20,7 @@  static void strip_last_component(struct strbuf *path)
 	/* Find start of the last component */
 	while (offset < len && !is_dir_sep(path->buf[len - 1]))
 		len--;
+
 	/* Skip sequences of multiple path-separators */
 	while (offset < len && is_dir_sep(path->buf[len - 1]))
 		len--;
@@ -66,6 +67,22 @@  static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
 #define MAXSYMLINKS 32
 #endif
 
+/* Count non-contiguous directory separators, not including a trailing one. */
+static int count_dir_separators(const char *s)
+{
+	int count = 0;
+	int last_sep = 0;
+	const char *p = s;
+	while (*p) {
+		int is_sep = is_dir_sep(*p++);
+		if (is_sep && !last_sep)
+			count++;
+		last_sep = is_sep;
+	}
+	return count;
+}
+
+
 /*
  * Return the real path (i.e., absolute path, with symlinks resolved
  * and extra slashes removed) equivalent to the specified path.  (If
@@ -80,6 +97,16 @@  static void get_root_part(struct strbuf *resolved, struct strbuf *remaining)
  */
 char *strbuf_realpath(struct strbuf *resolved, const char *path,
 		      int die_on_error)
+{
+	return strbuf_realpath_missing(resolved, path, 1, die_on_error);
+}
+
+/*
+ * Just like strbuf_realpath, but allows specifying how many missing components
+ * are permitted.  -1 may be specified to allow an unlimited number.
+ */
+char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
+			      int missing_components, int die_on_error)
 {
 	struct strbuf remaining = STRBUF_INIT;
 	struct strbuf next = STRBUF_INIT;
@@ -128,8 +155,12 @@  char *strbuf_realpath(struct strbuf *resolved, const char *path,
 		strbuf_addbuf(resolved, &next);
 
 		if (lstat(resolved->buf, &st)) {
+			int trailing_components = count_dir_separators(remaining.buf) +
+						  (remaining.len != 0);
 			/* error out unless this was the last component */
-			if (errno != ENOENT || remaining.len) {
+			if (errno != ENOENT ||
+			    !(missing_components == -1 ||
+			      trailing_components < missing_components)) {
 				if (die_on_error)
 					die_errno("Invalid path '%s'",
 						  resolved->buf);
diff --git a/cache.h b/cache.h
index c0072d43b1..ee4bc5ec04 100644
--- a/cache.h
+++ b/cache.h
@@ -1320,6 +1320,8 @@  static inline int is_absolute_path(const char *path)
 int is_directory(const char *);
 char *strbuf_realpath(struct strbuf *resolved, const char *path,
 		      int die_on_error);
+char *strbuf_realpath_missing(struct strbuf *resolved, const char *path,
+			      int missing_components, int die_on_error);
 char *real_pathdup(const char *path, int die_on_error);
 const char *absolute_path(const char *path);
 char *absolute_pathdup(const char *path);