diff mbox series

[4/4] remote: check branch names

Message ID dba31245607f85c48947da60fe0955a6ed3e2c43.1726067917.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series remote: branch setting fixes | expand

Commit Message

Phillip Wood Sept. 11, 2024, 3:18 p.m. UTC
From: Phillip Wood <phillip.wood@dunelm.org.uk>

Make sure the names passed to "git remote add -t <branch>" and "git
remote set-branches <branch>" are syntactically valid so that we do not
create invalid refspecs. This check needs to be performed before
creating the remote or modifying the existing configuration so a new
function is added rather than modifying add_branch()

Tests are added for both commands that to ensure (i) we report all the
invalid branch names passed on the command line, (ii) the branch names
are validated before modifying the configuration and (iii) wildcards
are accepted.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 builtin/remote.c  | 19 +++++++++++++++++++
 t/t5505-remote.sh | 28 ++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Junio C Hamano Sept. 11, 2024, 5:03 p.m. UTC | #1
"Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +static int check_branch_names(const char **branches)
> +{
> +	int ret = 0;
> +
> +	for (const char **b = branches; *b; b++) {
> +		if (check_refname_format(*b, REFNAME_ALLOW_ONELEVEL |
> +						REFNAME_REFSPEC_PATTERN))
> +			ret = error(_("invalid branch name '%s'"), *b);
> +	}
> +
> +	return ret;
> +}

This implementation is inconsistent with what "git branch new HEAD"
uses to check the validity of "new", which is in this call chain:

    builtin/branch.c:cmd_branch()
    -> branch.c:create_branch()
       -> branch.c:validate_new_branchname()
          -> branch.c:validate_branchname()
             -> object-name.c:strbuf_check_branch_ref()

At least, we should prepend "refs/heads/" to *b, so that we can
reject "refs/heads/HEAD".  The authoritative logic in the above
however may further evolve, and we need to make sure that these two
checks from drifting away from each other over time.  We probably
should refactor the leaf function in the above call chain so that
both places can use it (the main difference is that you allow '*' in
yours when calling check_refname_format()).

    Side note: we *should* lose "strbuf_" from its name, as it is
               not about string manipulation but the "strbuf'-ness
               of the function is merely that as the side effect of
               checking it computes a full refname and it happens to
               use strbuf as a mechanism to return it.

Something like the patch attached at the end.

>  static const char mirror_advice[] =
>  N_("--mirror is dangerous and deprecated; please\n"
>     "\t use --mirror=fetch or --mirror=push instead");
> @@ -203,6 +216,9 @@ static int add(int argc, const char **argv, const char *prefix)
>  	if (!valid_remote_name(name))
>  		die(_("'%s' is not a valid remote name"), name);
>  
> +	if (check_branch_names(track.v))
> +		exit(128);
> +

Seeing that the loop in check_branch_names() is brand new and you
could have iterated over a string-list just as easily, I somehow
doubt that step [3/4] was fully warranted.

> @@ -1601,6 +1617,9 @@ static int set_remote_branches(const char *remotename, const char **branches,
>  		exit(2);
>  	}
>  
> +	if (check_branch_names(branches))
> +		exit(128);

But here you are already passed "const char *branches[]" to this caller,
and it would be hassle to turn it into string_list, so [3/4] is fine
after all.



 object-name.h |  2 ++
 object-name.c | 23 +++++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git i/object-name.h w/object-name.h
index 8dba4a47a4..fa70d42044 100644
--- i/object-name.h
+++ w/object-name.h
@@ -130,4 +130,6 @@ struct object *repo_peel_to_type(struct repository *r,
 /* used when the code does not know or care what the default abbrev is */
 #define FALLBACK_DEFAULT_ABBREV 7
 
+/* Check if "name" is allowed as a branch */ 
+int valid_branch_name(const char *name, int allow_wildcard);
 #endif /* OBJECT_NAME_H */
diff --git i/object-name.c w/object-name.c
index 09c1bd93a3..e3bed5a664 100644
--- i/object-name.c
+++ w/object-name.c
@@ -1747,7 +1747,8 @@ void strbuf_branchname(struct strbuf *sb, const char *name, unsigned allowed)
 	strbuf_add(sb, name + used, len - used);
 }
 
-int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
+static int full_ref_from_branch_name_internal(struct strbuf *sb, const char *name,
+					      int crf_flags)
 {
 	if (startup_info->have_repository)
 		strbuf_branchname(sb, name, INTERPRET_BRANCH_LOCAL);
@@ -1766,7 +1767,25 @@ int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
 	    !strcmp(sb->buf, "refs/heads/HEAD"))
 		return -1;
 
-	return check_refname_format(sb->buf, 0);
+	return check_refname_format(sb->buf, crf_flags);
+}
+
+/* NEEDSWORK: rename this to full_ref_from_branch_name */
+int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
+{
+	return full_ref_from_branch_name_internal(sb, name, 0);
+}
+
+int valid_branch_name(const char *name, int allow_wildcard)
+{
+	struct strbuf sb = STRBUF_INIT;
+	int ret;
+	int flags;
+
+	flags = allow_wildcard ? REFNAME_REFSPEC_PATTERN : 0;
+	ret = full_ref_from_branch_name_internal(&sb, name, flags);
+	strbuf_release(&sb);
+	return ret;
 }
 
 void object_context_release(struct object_context *ctx)
Patrick Steinhardt Sept. 12, 2024, 10:05 a.m. UTC | #2
On Wed, Sep 11, 2024 at 10:03:26AM -0700, Junio C Hamano wrote:
> "Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
> > +static int check_branch_names(const char **branches)
> > +{
> > +	int ret = 0;
> > +
> > +	for (const char **b = branches; *b; b++) {
> > +		if (check_refname_format(*b, REFNAME_ALLOW_ONELEVEL |
> > +						REFNAME_REFSPEC_PATTERN))
> > +			ret = error(_("invalid branch name '%s'"), *b);
> > +	}
> > +
> > +	return ret;
> > +}
> 
> This implementation is inconsistent with what "git branch new HEAD"
> uses to check the validity of "new", which is in this call chain:
> 
>     builtin/branch.c:cmd_branch()
>     -> branch.c:create_branch()
>        -> branch.c:validate_new_branchname()
>           -> branch.c:validate_branchname()
>              -> object-name.c:strbuf_check_branch_ref()
> 
> At least, we should prepend "refs/heads/" to *b, so that we can
> reject "refs/heads/HEAD".  The authoritative logic in the above
> however may further evolve, and we need to make sure that these two
> checks from drifting away from each other over time.  We probably
> should refactor the leaf function in the above call chain so that
> both places can use it (the main difference is that you allow '*' in
> yours when calling check_refname_format()).
> 
>     Side note: we *should* lose "strbuf_" from its name, as it is
>                not about string manipulation but the "strbuf'-ness
>                of the function is merely that as the side effect of
>                checking it computes a full refname and it happens to
>                use strbuf as a mechanism to return it.
> 
> Something like the patch attached at the end.

Agreed. It's also kind of curious that the function lives in
"object-name.c" and not in "refs.c".

Patrick
Junio C Hamano Sept. 12, 2024, 4:32 p.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

> Agreed. It's also kind of curious that the function lives in
> "object-name.c" and not in "refs.c".

Because the helper groks things like "-" (aka "@{-1}"), it does a
bit more than "is this a reasonable name for a ref" and "please give
me the current value of this ref".  Also "refs/remotes/origin/HEAD"
may be valid as a refname, but forbidding "refs/heads/HEAD" is done
conceptually one level closer to the end-users.  Eventually, I think
it should move next to branch.c:validate_branchname() as a common
helper between "git branch" and "git remote" (possibly also with
"git switch/checkout", if they need to do validation themselves, but
I suspect they just call into branch.c at a bit higher "here is a
name, create it and you are free to complain---I do not care about
the details of why you decide the name is bad" interface).

Thanks.
Phillip Wood Sept. 13, 2024, 3:09 p.m. UTC | #4
On 11/09/2024 18:03, Junio C Hamano wrote:
> "Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
> The authoritative logic in the above
> however may further evolve, and we need to make sure that these two
> checks from drifting away from each other over time.  We probably
> should refactor the leaf function in the above call chain so that
> both places can use it (the main difference is that you allow '*' in
> yours when calling check_refname_format()).
> 
>      Side note: we *should* lose "strbuf_" from its name, as it is
>                 not about string manipulation but the "strbuf'-ness
>                 of the function is merely that as the side effect of
>                 checking it computes a full refname and it happens to
>                 use strbuf as a mechanism to return it.
> 
> Something like the patch attached at the end.

Thanks for the patch, I'll re-roll based on that. I wonder if we really 
want to support "@{-N}" when setting remote tracking branches though - 
should we be using INTERPRET_BRANCH_REMOTE instead when calling 
strbuf_branchname()?

Best Wishes

Phillip

>>   static const char mirror_advice[] =
>>   N_("--mirror is dangerous and deprecated; please\n"
>>      "\t use --mirror=fetch or --mirror=push instead");
>> @@ -203,6 +216,9 @@ static int add(int argc, const char **argv, const char *prefix)
>>   	if (!valid_remote_name(name))
>>   		die(_("'%s' is not a valid remote name"), name);
>>   
>> +	if (check_branch_names(track.v))
>> +		exit(128);
>> +
> 
> Seeing that the loop in check_branch_names() is brand new and you
> could have iterated over a string-list just as easily, I somehow
> doubt that step [3/4] was fully warranted.
> 
>> @@ -1601,6 +1617,9 @@ static int set_remote_branches(const char *remotename, const char **branches,
>>   		exit(2);
>>   	}
>>   
>> +	if (check_branch_names(branches))
>> +		exit(128);
> 
> But here you are already passed "const char *branches[]" to this caller,
> and it would be hassle to turn it into string_list, so [3/4] is fine
> after all.
> 
> 
> 
>   object-name.h |  2 ++
>   object-name.c | 23 +++++++++++++++++++++--
>   2 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git i/object-name.h w/object-name.h
> index 8dba4a47a4..fa70d42044 100644
> --- i/object-name.h
> +++ w/object-name.h
> @@ -130,4 +130,6 @@ struct object *repo_peel_to_type(struct repository *r,
>   /* used when the code does not know or care what the default abbrev is */
>   #define FALLBACK_DEFAULT_ABBREV 7
>   
> +/* Check if "name" is allowed as a branch */
> +int valid_branch_name(const char *name, int allow_wildcard);
>   #endif /* OBJECT_NAME_H */
> diff --git i/object-name.c w/object-name.c
> index 09c1bd93a3..e3bed5a664 100644
> --- i/object-name.c
> +++ w/object-name.c
> @@ -1747,7 +1747,8 @@ void strbuf_branchname(struct strbuf *sb, const char *name, unsigned allowed)
>   	strbuf_add(sb, name + used, len - used);
>   }
>   
> -int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
> +static int full_ref_from_branch_name_internal(struct strbuf *sb, const char *name,
> +					      int crf_flags)
>   {
>   	if (startup_info->have_repository)
>   		strbuf_branchname(sb, name, INTERPRET_BRANCH_LOCAL);
> @@ -1766,7 +1767,25 @@ int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
>   	    !strcmp(sb->buf, "refs/heads/HEAD"))
>   		return -1;
>   
> -	return check_refname_format(sb->buf, 0);
> +	return check_refname_format(sb->buf, crf_flags);
> +}
> +
> +/* NEEDSWORK: rename this to full_ref_from_branch_name */
> +int strbuf_check_branch_ref(struct strbuf *sb, const char *name)
> +{
> +	return full_ref_from_branch_name_internal(sb, name, 0);
> +}
> +
> +int valid_branch_name(const char *name, int allow_wildcard)
> +{
> +	struct strbuf sb = STRBUF_INIT;
> +	int ret;
> +	int flags;
> +
> +	flags = allow_wildcard ? REFNAME_REFSPEC_PATTERN : 0;
> +	ret = full_ref_from_branch_name_internal(&sb, name, flags);
> +	strbuf_release(&sb);
> +	return ret;
>   }
>   
>   void object_context_release(struct object_context *ctx)
Junio C Hamano Sept. 13, 2024, 5:49 p.m. UTC | #5
phillip.wood123@gmail.com writes:

> Thanks for the patch, I'll re-roll based on that. I wonder if we
> really want to support "@{-N}" when setting remote tracking branches
> though - should we be using INTERPRET_BRANCH_REMOTE instead when
> calling strbuf_branchname()?

Perhaps.  Users try to use "-" in surprising places, though ;-)
Phillip Wood Sept. 18, 2024, 1:18 p.m. UTC | #6
On 13/09/2024 18:49, Junio C Hamano wrote:
> phillip.wood123@gmail.com writes:
> 
>> Thanks for the patch, I'll re-roll based on that. I wonder if we
>> really want to support "@{-N}" when setting remote tracking branches
>> though - should we be using INTERPRET_BRANCH_REMOTE instead when
>> calling strbuf_branchname()?
> 
> Perhaps.  Users try to use "-" in surprising places, though ;-)

strbuf_check_branch_ref() already rejects "-".

INTERPRET_BRANCH_REMOTE supports @{upstream} which might be useful but 
then we will need to check it refers to the correct remote and expand it 
when setting the fetch refspec so a boolean function to check if a name 
is acceptable is insufficient. Given that "git remote set-branches" has 
only ever supported "real" branch names and patterns on the command line 
and no-one has complained I wonder if we're better off doing something like

	if (strbuf_check_branch_ref(&buf, branch_name) ||
	    strcmp(buf.buf + 11, branch_name))
		error(_("invalid branch name '%s'", branch_name));

where the "buf.buf + 11" skips "refs/heads/"

Best Wishes

Phillip
Junio C Hamano Sept. 18, 2024, 8:24 p.m. UTC | #7
phillip.wood123@gmail.com writes:

> ... Given that "git remote
> set-branches" has only ever supported "real" branch names and patterns
> on the command line and no-one has complained I wonder if we're better
> off doing something like
>
> 	if (strbuf_check_branch_ref(&buf, branch_name) ||
> 	    strcmp(buf.buf + 11, branch_name))
> 		error(_("invalid branch name '%s'", branch_name));
>
> where the "buf.buf + 11" skips "refs/heads/"

Yeah, replacing +11 with skip_prefix() or something for readability,
such a check might be good enough in pracrice.

Thanks.
diff mbox series

Patch

diff --git a/builtin/remote.c b/builtin/remote.c
index 318701496ed..fd84bfbfe7a 100644
--- a/builtin/remote.c
+++ b/builtin/remote.c
@@ -132,6 +132,19 @@  static void add_branch(const char *key, const char *branchname,
 	git_config_set_multivar(key, tmp->buf, "^$", 0);
 }
 
+static int check_branch_names(const char **branches)
+{
+	int ret = 0;
+
+	for (const char **b = branches; *b; b++) {
+		if (check_refname_format(*b, REFNAME_ALLOW_ONELEVEL |
+						REFNAME_REFSPEC_PATTERN))
+			ret = error(_("invalid branch name '%s'"), *b);
+	}
+
+	return ret;
+}
+
 static const char mirror_advice[] =
 N_("--mirror is dangerous and deprecated; please\n"
    "\t use --mirror=fetch or --mirror=push instead");
@@ -203,6 +216,9 @@  static int add(int argc, const char **argv, const char *prefix)
 	if (!valid_remote_name(name))
 		die(_("'%s' is not a valid remote name"), name);
 
+	if (check_branch_names(track.v))
+		exit(128);
+
 	strbuf_addf(&buf, "remote.%s.url", name);
 	git_config_set(buf.buf, url);
 
@@ -1601,6 +1617,9 @@  static int set_remote_branches(const char *remotename, const char **branches,
 		exit(2);
 	}
 
+	if (check_branch_names(branches))
+		exit(128);
+
 	if (!add_mode && remove_all_fetch_refspecs(key.buf)) {
 		error(_("could not remove existing fetch refspec"));
 		strbuf_release(&key);
diff --git a/t/t5505-remote.sh b/t/t5505-remote.sh
index cfbd6139e00..709cbe65924 100755
--- a/t/t5505-remote.sh
+++ b/t/t5505-remote.sh
@@ -1195,6 +1195,34 @@  test_expect_success 'remote set-branches with --mirror' '
 	test_cmp expect.replace actual.replace
 '
 
+test_expect_success 'remote set-branches rejects invalid branch name' '
+	git remote add test https://git.example.com/repo &&
+	test_when_finished "git config --unset-all remote.test.fetch; \
+			    git config --unset remote.test.url" &&
+	test_must_fail git remote set-branches test "topic/*" in..valid \
+				feature "b a d" 2>err &&
+	cat >expect <<-EOF &&
+	error: invalid branch name ${SQ}in..valid${SQ}
+	error: invalid branch name ${SQ}b a d${SQ}
+	EOF
+	test_cmp expect err &&
+	git config --get-all remote.test.fetch >actual &&
+	echo "+refs/heads/*:refs/remotes/test/*" >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'remote add -t rejects invalid branch name' '
+	test_must_fail git remote add test -t .bad -t "topic/*" -t in:valid \
+				 https://git.example.com/repo 2>err &&
+	cat >expect <<-EOF &&
+	error: invalid branch name ${SQ}.bad${SQ}
+	error: invalid branch name ${SQ}in:valid${SQ}
+	EOF
+	test_cmp expect err &&
+	test_expect_code 1 git config --get-regexp ^remote\\.test\\. >actual &&
+	test_must_be_empty actual
+'
+
 test_expect_success 'new remote' '
 	git remote add someremote foo &&
 	echo foo >expect &&