worktree add: sanitize worktree names
diff mbox series

Message ID 20190221110026.23135-1-pclouds@gmail.com
State New
Headers show
Series
  • worktree add: sanitize worktree names
Related show

Commit Message

Duy Nguyen Feb. 21, 2019, 11 a.m. UTC
Worktree names are based on $(basename $GIT_WORK_TREE). They aren't
significant until 3a3b9d8cde (refs: new ref types to make per-worktree
refs visible to all worktrees - 2018-10-21), where worktree name could
be part of a refname and must follow refname rules.

Update 'worktree add' code to remove special characters to follow
these rules. The code could replace chars with '-' more than
necessary, but it keeps the code simple. In the future the user will
be able to specify the worktree name by themselves if they're not
happy with this dumb character substitution.

Reported-by: hi-angel@yandex.ru
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/worktree.c      | 47 ++++++++++++++++++++++++++++++++++++++++-
 t/t2025-worktree-add.sh |  5 +++++
 2 files changed, 51 insertions(+), 1 deletion(-)

Comments

Konstantin Kharlamov Feb. 21, 2019, 11:28 a.m. UTC | #1
On Чт, Feb 21, 2019 at 2:00 PM, 
=?UTF-8?b?Tmd1eeG7hW4gVGjDoWkgTmfhu41j?= Duy <pclouds@gmail.com> wrote:
> Worktree names are based on $(basename $GIT_WORK_TREE). They aren't
> significant until 3a3b9d8cde (refs: new ref types to make per-worktree
> refs visible to all worktrees - 2018-10-21), where worktree name could
> be part of a refname and must follow refname rules.
> 
> Update 'worktree add' code to remove special characters to follow
> these rules. The code could replace chars with '-' more than
> necessary, but it keeps the code simple. In the future the user will
> be able to specify the worktree name by themselves if they're not
> happy with this dumb character substitution.
> 
> Reported-by: hi-angel@yandex.ru
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  builtin/worktree.c      | 47 
> ++++++++++++++++++++++++++++++++++++++++-
>  t/t2025-worktree-add.sh |  5 +++++
>  2 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/builtin/worktree.c b/builtin/worktree.c
> index 3f9907fcc9..ff36838a33 100644
> --- a/builtin/worktree.c
> +++ b/builtin/worktree.c
> @@ -262,6 +262,46 @@ static void validate_worktree_add(const char 
> *path, const struct add_opts *opts)
>  	free_worktrees(worktrees);
>  }
> 
> +/*
> + * worktree name is part of refname and has to pass
> + * check_refname_component(). Remove unallowed characters to make it
> + * valid.
> + */
> +static void sanitize_worktree_name(struct strbuf *name)
> +{
> +	int i;
> +
> +	/* no ending with .lock */
> +	if (ends_with(name->buf, ".lock"))
> +		strbuf_remove(name, name->len - strlen(".lock"),
> +			      strlen(".lock"));
> +
> +	/*
> +	 * All special chars replaced with dashes. See
> +	 * check_refname_component() for reference.
> +	 */
> +	for (i = 0; i < name->len; i++) {
> +		if (strchr(":?[]\\~ \t@{}*/.", name->buf[i]))
> +			name->buf[i] = '-';
> +	}
> +
> +	/* remove consecutive dashes, leading or trailing dashes */
> +	for (i = 0; i < name->len; i++) {
> +		while (name->buf[i] == '-' &&
> +		       (i == 0 ||
> +			i == name->len - 1 ||
> +			(i < name->len - 1 && name->buf[i + 1] == '-')))
> +			strbuf_remove(name, i, 1);
> +	}
> +
> +	/* last resort, should never ever happen in practice */
> +	if (name->len == 0)
> +		strbuf_addstr(name, "worktree");

I assume this means a user have passed a zero-sized worktree name? But 
zero-sized file/directory names are not possible anyway, would it make 
sense to just return an error in this case?

> +
> +	if (check_refname_format(name->buf, REFNAME_ALLOW_ONELEVEL))
> +		BUG("worktree name '%s' is not a valid refname", name->buf);
> +}
> +
>  static int add_worktree(const char *path, const char *refname,
>  			const struct add_opts *opts)
>  {
> @@ -275,6 +315,7 @@ static int add_worktree(const char *path, const 
> char *refname,
>  	struct strbuf symref = STRBUF_INIT;
>  	struct commit *commit = NULL;
>  	int is_branch = 0;
> +	struct strbuf sb_name = STRBUF_INIT;
> 
>  	validate_worktree_add(path, opts);
> 
> @@ -290,7 +331,10 @@ static int add_worktree(const char *path, const 
> char *refname,
>  		die(_("invalid reference: %s"), refname);
> 
>  	name = worktree_basename(path, &len);
> -	git_path_buf(&sb_repo, "worktrees/%.*s", (int)(path + len - name), 
> name);
> +	strbuf_add(&sb_name, name, path + len - name);
> +	sanitize_worktree_name(&sb_name);
> +	name = sb_name.buf;
> +	git_path_buf(&sb_repo, "worktrees/%s", name);
>  	len = sb_repo.len;
>  	if (safe_create_leading_directories_const(sb_repo.buf))
>  		die_errno(_("could not create leading directories of '%s'"),
> @@ -415,6 +459,7 @@ static int add_worktree(const char *path, const 
> char *refname,
>  	strbuf_release(&symref);
>  	strbuf_release(&sb_repo);
>  	strbuf_release(&sb_git);
> +	strbuf_release(&sb_name);
>  	return ret;
>  }
> 
> diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
> index 286bba35d8..0d465adb54 100755
> --- a/t/t2025-worktree-add.sh
> +++ b/t/t2025-worktree-add.sh
> @@ -570,4 +570,9 @@ test_expect_success '"add" an existing locked but 
> missing worktree' '
>  	git worktree add --force --force --detach gnoo
>  '
> 
> +test_expect_success 'sanitize generated worktree name' '
> +	git worktree add --detach ".  weird*..?.lock" &&
> +	test -d .git/worktrees/weird
> +'
> +
>  test_done
> --
> 2.21.0.rc1.337.gdf7f8d0522
>
Duy Nguyen Feb. 21, 2019, 11:38 a.m. UTC | #2
On Thu, Feb 21, 2019 at 6:28 PM Konstantin Kharlamov <hi-angel@yandex.ru> wrote:
>
>
>
> On Чт, Feb 21, 2019 at 2:00 PM,
> =?UTF-8?b?Tmd1eeG7hW4gVGjDoWkgTmfhu41j?= Duy <pclouds@gmail.com> wrote:
> > Worktree names are based on $(basename $GIT_WORK_TREE). They aren't
> > significant until 3a3b9d8cde (refs: new ref types to make per-worktree
> > refs visible to all worktrees - 2018-10-21), where worktree name could
> > be part of a refname and must follow refname rules.
> >
> > Update 'worktree add' code to remove special characters to follow
> > these rules. The code could replace chars with '-' more than
> > necessary, but it keeps the code simple. In the future the user will
> > be able to specify the worktree name by themselves if they're not
> > happy with this dumb character substitution.
> >
> > Reported-by: hi-angel@yandex.ru
> > Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> > ---
> >  builtin/worktree.c      | 47
> > ++++++++++++++++++++++++++++++++++++++++-
> >  t/t2025-worktree-add.sh |  5 +++++
> >  2 files changed, 51 insertions(+), 1 deletion(-)
> >
> > diff --git a/builtin/worktree.c b/builtin/worktree.c
> > index 3f9907fcc9..ff36838a33 100644
> > --- a/builtin/worktree.c
> > +++ b/builtin/worktree.c
> > @@ -262,6 +262,46 @@ static void validate_worktree_add(const char
> > *path, const struct add_opts *opts)
> >       free_worktrees(worktrees);
> >  }
> >
> > +/*
> > + * worktree name is part of refname and has to pass
> > + * check_refname_component(). Remove unallowed characters to make it
> > + * valid.
> > + */
> > +static void sanitize_worktree_name(struct strbuf *name)
> > +{
> > +     int i;
> > +
> > +     /* no ending with .lock */
> > +     if (ends_with(name->buf, ".lock"))
> > +             strbuf_remove(name, name->len - strlen(".lock"),
> > +                           strlen(".lock"));
> > +
> > +     /*
> > +      * All special chars replaced with dashes. See
> > +      * check_refname_component() for reference.
> > +      */
> > +     for (i = 0; i < name->len; i++) {
> > +             if (strchr(":?[]\\~ \t@{}*/.", name->buf[i]))
> > +                     name->buf[i] = '-';
> > +     }
> > +
> > +     /* remove consecutive dashes, leading or trailing dashes */
> > +     for (i = 0; i < name->len; i++) {
> > +             while (name->buf[i] == '-' &&
> > +                    (i == 0 ||
> > +                     i == name->len - 1 ||
> > +                     (i < name->len - 1 && name->buf[i + 1] == '-')))
> > +                     strbuf_remove(name, i, 1);
> > +     }
> > +
> > +     /* last resort, should never ever happen in practice */
> > +     if (name->len == 0)
> > +             strbuf_addstr(name, "worktree");
>
> I assume this means a user have passed a zero-sized worktree name? But
> zero-sized file/directory names are not possible anyway, would it make
> sense to just return an error in this case?

It could happen if you do "git worktree add .lock". The ".lock" part
will be stripped out, leaving us with an empty string.
Konstantin Kharlamov Feb. 21, 2019, 11:44 a.m. UTC | #3
On Чт, Feb 21, 2019 at 2:38 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> On Thu, Feb 21, 2019 at 6:28 PM Konstantin Kharlamov 
> <hi-angel@yandex.ru> wrote:
>> 
>> 
>> 
>>  On Чт, Feb 21, 2019 at 2:00 PM,
>>  =?UTF-8?b?Tmd1eeG7hW4gVGjDoWkgTmfhu41j?= Duy <pclouds@gmail.com> 
>> wrote:
>>  > Worktree names are based on $(basename $GIT_WORK_TREE). They 
>> aren't
>>  > significant until 3a3b9d8cde (refs: new ref types to make 
>> per-worktree
>>  > refs visible to all worktrees - 2018-10-21), where worktree name 
>> could
>>  > be part of a refname and must follow refname rules.
>>  >
>>  > Update 'worktree add' code to remove special characters to follow
>>  > these rules. The code could replace chars with '-' more than
>>  > necessary, but it keeps the code simple. In the future the user 
>> will
>>  > be able to specify the worktree name by themselves if they're not
>>  > happy with this dumb character substitution.
>>  >
>>  > Reported-by: hi-angel@yandex.ru
>>  > Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>>  > ---
>>  >  builtin/worktree.c      | 47
>>  > ++++++++++++++++++++++++++++++++++++++++-
>>  >  t/t2025-worktree-add.sh |  5 +++++
>>  >  2 files changed, 51 insertions(+), 1 deletion(-)
>>  >
>>  > diff --git a/builtin/worktree.c b/builtin/worktree.c
>>  > index 3f9907fcc9..ff36838a33 100644
>>  > --- a/builtin/worktree.c
>>  > +++ b/builtin/worktree.c
>>  > @@ -262,6 +262,46 @@ static void validate_worktree_add(const char
>>  > *path, const struct add_opts *opts)
>>  >       free_worktrees(worktrees);
>>  >  }
>>  >
>>  > +/*
>>  > + * worktree name is part of refname and has to pass
>>  > + * check_refname_component(). Remove unallowed characters to 
>> make it
>>  > + * valid.
>>  > + */
>>  > +static void sanitize_worktree_name(struct strbuf *name)
>>  > +{
>>  > +     int i;
>>  > +
>>  > +     /* no ending with .lock */
>>  > +     if (ends_with(name->buf, ".lock"))
>>  > +             strbuf_remove(name, name->len - strlen(".lock"),
>>  > +                           strlen(".lock"));
>>  > +
>>  > +     /*
>>  > +      * All special chars replaced with dashes. See
>>  > +      * check_refname_component() for reference.
>>  > +      */
>>  > +     for (i = 0; i < name->len; i++) {
>>  > +             if (strchr(":?[]\\~ \t@{}*/.", name->buf[i]))
>>  > +                     name->buf[i] = '-';
>>  > +     }
>>  > +
>>  > +     /* remove consecutive dashes, leading or trailing dashes */
>>  > +     for (i = 0; i < name->len; i++) {
>>  > +             while (name->buf[i] == '-' &&
>>  > +                    (i == 0 ||
>>  > +                     i == name->len - 1 ||
>>  > +                     (i < name->len - 1 && name->buf[i + 1] == 
>> '-')))
>>  > +                     strbuf_remove(name, i, 1);
>>  > +     }
>>  > +
>>  > +     /* last resort, should never ever happen in practice */
>>  > +     if (name->len == 0)
>>  > +             strbuf_addstr(name, "worktree");
>> 
>>  I assume this means a user have passed a zero-sized worktree name? 
>> But
>>  zero-sized file/directory names are not possible anyway, would it 
>> make
>>  sense to just return an error in this case?
> 
> It could happen if you do "git worktree add .lock". The ".lock" part
> will be stripped out, leaving us with an empty string.

Ah, I see. Then, would it maybe make sense to just sanitize the ".lock" 
out the same way as you did with special symbols, i.e. with dashes?

(I am not a git developer, so not sure if that's a good question, but I 
would also question why ".lock" needs to be deleted. I guess git uses 
the postfix internally, but why can't it be okay with "name.lock.lock")
Duy Nguyen Feb. 21, 2019, 11:52 a.m. UTC | #4
On Thu, Feb 21, 2019 at 6:44 PM Konstantin Kharlamov <hi-angel@yandex.ru> wrote:
> >>  > +
> >>  > +     /* last resort, should never ever happen in practice */
> >>  > +     if (name->len == 0)
> >>  > +             strbuf_addstr(name, "worktree");
> >>
> >>  I assume this means a user have passed a zero-sized worktree name?
> >> But
> >>  zero-sized file/directory names are not possible anyway, would it
> >> make
> >>  sense to just return an error in this case?
> >
> > It could happen if you do "git worktree add .lock". The ".lock" part
> > will be stripped out, leaving us with an empty string.
>
> Ah, I see. Then, would it maybe make sense to just sanitize the ".lock"
> out the same way as you did with special symbols, i.e. with dashes?

Hmm.. I actually did not think of that. Then we could return the error
if "name" is empty.

> (I am not a git developer, so not sure if that's a good question, but I
> would also question why ".lock" needs to be deleted. I guess git uses

It's because "foo.lock" is usually a temporary file to prepare things
before we do an atomic update to "foo". And the "refs guys" were just
being careful when they designed reference names so they reject any
reference names that end with .lock. You can try to create a branch
named something.lock, it will not go through. This is actually
documented in "git help check-ref-format".

> the postfix internally, but why can't it be okay with "name.lock.lock")

Uh oh I miss this case. I only delete ".lock" once, "name.lock" would
still be rejected. Thanks for noticing.
Jeff King Feb. 21, 2019, 1:23 p.m. UTC | #5
On Thu, Feb 21, 2019 at 06:52:05PM +0700, Duy Nguyen wrote:

> > the postfix internally, but why can't it be okay with "name.lock.lock")
> 
> Uh oh I miss this case. I only delete ".lock" once, "name.lock" would
> still be rejected. Thanks for noticing.

Another tricky case is "refs/heads/foo.lock/bar.lock", which would need
both ".lock"s removed. I think your v2 handles this correctly, though
(because it disallows "." entirely).

-Peff

Patch
diff mbox series

diff --git a/builtin/worktree.c b/builtin/worktree.c
index 3f9907fcc9..ff36838a33 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -262,6 +262,46 @@  static void validate_worktree_add(const char *path, const struct add_opts *opts)
 	free_worktrees(worktrees);
 }
 
+/*
+ * worktree name is part of refname and has to pass
+ * check_refname_component(). Remove unallowed characters to make it
+ * valid.
+ */
+static void sanitize_worktree_name(struct strbuf *name)
+{
+	int i;
+
+	/* no ending with .lock */
+	if (ends_with(name->buf, ".lock"))
+		strbuf_remove(name, name->len - strlen(".lock"),
+			      strlen(".lock"));
+
+	/*
+	 * All special chars replaced with dashes. See
+	 * check_refname_component() for reference.
+	 */
+	for (i = 0; i < name->len; i++) {
+		if (strchr(":?[]\\~ \t@{}*/.", name->buf[i]))
+			name->buf[i] = '-';
+	}
+
+	/* remove consecutive dashes, leading or trailing dashes */
+	for (i = 0; i < name->len; i++) {
+		while (name->buf[i] == '-' &&
+		       (i == 0 ||
+			i == name->len - 1 ||
+			(i < name->len - 1 && name->buf[i + 1] == '-')))
+			strbuf_remove(name, i, 1);
+	}
+
+	/* last resort, should never ever happen in practice */
+	if (name->len == 0)
+		strbuf_addstr(name, "worktree");
+
+	if (check_refname_format(name->buf, REFNAME_ALLOW_ONELEVEL))
+		BUG("worktree name '%s' is not a valid refname", name->buf);
+}
+
 static int add_worktree(const char *path, const char *refname,
 			const struct add_opts *opts)
 {
@@ -275,6 +315,7 @@  static int add_worktree(const char *path, const char *refname,
 	struct strbuf symref = STRBUF_INIT;
 	struct commit *commit = NULL;
 	int is_branch = 0;
+	struct strbuf sb_name = STRBUF_INIT;
 
 	validate_worktree_add(path, opts);
 
@@ -290,7 +331,10 @@  static int add_worktree(const char *path, const char *refname,
 		die(_("invalid reference: %s"), refname);
 
 	name = worktree_basename(path, &len);
-	git_path_buf(&sb_repo, "worktrees/%.*s", (int)(path + len - name), name);
+	strbuf_add(&sb_name, name, path + len - name);
+	sanitize_worktree_name(&sb_name);
+	name = sb_name.buf;
+	git_path_buf(&sb_repo, "worktrees/%s", name);
 	len = sb_repo.len;
 	if (safe_create_leading_directories_const(sb_repo.buf))
 		die_errno(_("could not create leading directories of '%s'"),
@@ -415,6 +459,7 @@  static int add_worktree(const char *path, const char *refname,
 	strbuf_release(&symref);
 	strbuf_release(&sb_repo);
 	strbuf_release(&sb_git);
+	strbuf_release(&sb_name);
 	return ret;
 }
 
diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
index 286bba35d8..0d465adb54 100755
--- a/t/t2025-worktree-add.sh
+++ b/t/t2025-worktree-add.sh
@@ -570,4 +570,9 @@  test_expect_success '"add" an existing locked but missing worktree' '
 	git worktree add --force --force --detach gnoo
 '
 
+test_expect_success 'sanitize generated worktree name' '
+	git worktree add --detach ".  weird*..?.lock" &&
+	test -d .git/worktrees/weird
+'
+
 test_done