diff mbox series

Cloning empty repository uses locally configured default branch name

Message ID 20201208013121.677494-1-jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Cloning empty repository uses locally configured default branch name | expand

Commit Message

Jonathan Tan Dec. 8, 2020, 1:31 a.m. UTC
When cloning an empty repository, a local branch is created. But its
name is not the name of the branch that the remote HEAD points to - it
is the locally configured default branch name. This issue arose at
$DAYJOB and, from my memory, it is also not an uncommon workflow to
configure things online on a repo host and then use "git clone" so that
things like remotes are automatically configured.

Has anyone looked into solutions for this? Both protocol v0 and v2 do
not send symref information about unborn branches (v0 because, as
protocol-capabilities.txt says, "servers SHOULD include this capability
for the HEAD symref if it is one of the refs being sent"; v2 because
a symref is included only if it refers to one of the refs being sent).
In protocol v2, this could be done by adding a capability to ls-remote
(maybe, "unborn"), and in protocol v0, this could be done either by
updating the existing "symref" capability to be written even when the
target branch is unborn (which is potentially backwards incompatible) or
introducing a new capability which is like "symref".

A small issue is that upload-pack protocol v0 doesn't even write the
blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
branch, but that can be fixed as in the patch below.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 upload-pack.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

Comments

Junio C Hamano Dec. 8, 2020, 2:16 a.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> Has anyone looked into solutions for this? Both protocol v0 and v2 do
> not send symref information about unborn branches (v0 because, as
> protocol-capabilities.txt says, "servers SHOULD include this capability
> for the HEAD symref if it is one of the refs being sent"; v2 because
> a symref is included only if it refers to one of the refs being sent).
> In protocol v2, this could be done by adding a capability to ls-remote
> (maybe, "unborn"), and in protocol v0, this could be done either by
> updating the existing "symref" capability to be written even when the
> target branch is unborn (which is potentially backwards incompatible) or
> introducing a new capability which is like "symref".

Thanks for looking into this (I think this came up again today
during my reviews of some topic).

It would be a backward incompatible change to add to v0, but at this
point shouldn't we be leaving v0 as-is and move everybody to v2?

If it is a simple and safe enough change, though, saying "why not"
is very tempting, though.

> A small issue is that upload-pack protocol v0 doesn't even write the
> blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
> branch, but that can be fixed as in the patch below.

I think the codepaths we have today in process_capabilities() and
process_dummy_ref() (both in connect.c) would do the right thing
when it sees a blank ref line even when nothing gets transported,
but I smell that the rewrite of this state machine is fairly recent
(say in the past few years) and I do not offhand know if clients
before the rewrite of the state machine (say in v2.18.0) would be OK
with the change.  It should be easy to check, though.

> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  upload-pack.c | 40 +++++++++++++++++++++++++++-------------
>  1 file changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/upload-pack.c b/upload-pack.c
> index 1006bebd50..d2359a8560 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -1179,18 +1179,15 @@ static void format_symref_info(struct strbuf *buf, struct string_list *symref)
>  		strbuf_addf(buf, " symref=%s:%s", item->string, (char *)item->util);
>  }
>  
> -static int send_ref(const char *refname, const struct object_id *oid,
> -		    int flag, void *cb_data)
> +static const char *capabilities = "multi_ack thin-pack side-band"
> +	" side-band-64k ofs-delta shallow deepen-since deepen-not"
> +	" deepen-relative no-progress include-tag multi_ack_detailed";
> +
> +static void write_ref_lines(const char *refname_nons,
> +			    const struct object_id *oid,
> +			    const struct object_id *peeled,
> +			    struct upload_pack_data *data)
>  {
> -	static const char *capabilities = "multi_ack thin-pack side-band"
> -		" side-band-64k ofs-delta shallow deepen-since deepen-not"
> -		" deepen-relative no-progress include-tag multi_ack_detailed";
> -	const char *refname_nons = strip_namespace(refname);
> -	struct object_id peeled;
> -	struct upload_pack_data *data = cb_data;
> -
> -	if (mark_our_ref(refname_nons, refname, oid))
> -		return 0;
>  
>  	if (capabilities) {
>  		struct strbuf symref_info = STRBUF_INIT;
> @@ -1213,8 +1210,23 @@ static int send_ref(const char *refname, const struct object_id *oid,
>  		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
>  	}
>  	capabilities = NULL;
> -	if (!peel_ref(refname, &peeled))
> -		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
> +	if (peeled)
> +		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(peeled), refname_nons);
> +}
> +
> +static int send_ref(const char *refname, const struct object_id *oid,
> +		    int flag, void *cb_data)
> +{
> +	const char *refname_nons = strip_namespace(refname);
> +	struct object_id peeled;
> +	struct upload_pack_data *data = cb_data;
> +
> +	if (mark_our_ref(refname_nons, refname, oid))
> +		return 0;
> +	write_ref_lines(refname_nons,
> +			oid,
> +			peel_ref(refname, &peeled) ? NULL : &peeled,
> +			data);
>  	return 0;
>  }
>  
> @@ -1332,6 +1344,8 @@ void upload_pack(struct upload_pack_options *options)
>  		reset_timeout(data.timeout);
>  		head_ref_namespaced(send_ref, &data);
>  		for_each_namespaced_ref(send_ref, &data);
> +		if (capabilities)
> +			write_ref_lines("capabilities^{}", &null_oid, NULL, &data);
>  		advertise_shallow_grafts(1);
>  		packet_flush(1);
>  	} else {
brian m. carlson Dec. 8, 2020, 2:32 a.m. UTC | #2
On 2020-12-08 at 02:16:07, Junio C Hamano wrote:
> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > Has anyone looked into solutions for this? Both protocol v0 and v2 do
> > not send symref information about unborn branches (v0 because, as
> > protocol-capabilities.txt says, "servers SHOULD include this capability
> > for the HEAD symref if it is one of the refs being sent"; v2 because
> > a symref is included only if it refers to one of the refs being sent).
> > In protocol v2, this could be done by adding a capability to ls-remote
> > (maybe, "unborn"), and in protocol v0, this could be done either by
> > updating the existing "symref" capability to be written even when the
> > target branch is unborn (which is potentially backwards incompatible) or
> > introducing a new capability which is like "symref".
> 
> Thanks for looking into this (I think this came up again today
> during my reviews of some topic).
> 
> It would be a backward incompatible change to add to v0, but at this
> point shouldn't we be leaving v0 as-is and move everybody to v2?
> 
> If it is a simple and safe enough change, though, saying "why not"
> is very tempting, though.

Yeah, I think this would be a nice thing to add to v2.  I've considered
adding a way to push symrefs (that is, update the head on the remote
side), but that would be a bit trickier.  Still, there's no reason the
fetch side couldn't learn a "symref" capability in the meantime.

I don't see a need for this in v0, since all versions of Git that
support this will also support v2.  I think it's okay if other clients
have to add support for v2 before they get the cool new features.
Jeff King Dec. 8, 2020, 3:58 p.m. UTC | #3
On Mon, Dec 07, 2020 at 05:31:20PM -0800, Jonathan Tan wrote:

> When cloning an empty repository, a local branch is created. But its
> name is not the name of the branch that the remote HEAD points to - it
> is the locally configured default branch name. This issue arose at
> $DAYJOB and, from my memory, it is also not an uncommon workflow to
> configure things online on a repo host and then use "git clone" so that
> things like remotes are automatically configured.
> 
> Has anyone looked into solutions for this? Both protocol v0 and v2 do
> not send symref information about unborn branches (v0 because, as
> protocol-capabilities.txt says, "servers SHOULD include this capability
> for the HEAD symref if it is one of the refs being sent"; v2 because
> a symref is included only if it refers to one of the refs being sent).
> In protocol v2, this could be done by adding a capability to ls-remote
> (maybe, "unborn"), and in protocol v0, this could be done either by
> updating the existing "symref" capability to be written even when the
> target branch is unborn (which is potentially backwards incompatible) or
> introducing a new capability which is like "symref".

We discussed this a few years ago, and I even wrote a small patch (for
v0 at the time, of course):

  https://lore.kernel.org/git/20170525155924.hk5jskennph6tta3@sigill.intra.peff.net/

A rebased version of that patch is below (it needed updating to handle
some namespacing stuff). Coupled with your patch here for the truly
empty repo case, it makes the server side of v0 do what you'd want.

But the client side needs to handle it, too. See the linked thread for
some discussion.

I wouldn't be too worried about the backwards incompatibility of sending
a symref line in the capabilities that doesn't point to a ref we're
sending. Old clients are quite likely to ignore it. But...

> A small issue is that upload-pack protocol v0 doesn't even write the
> blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
> branch, but that can be fixed as in the patch below.

I would worry how clients handle this bogus entry in the ref
advertisement. It looks like the actual Git client is OK, but what about
jgit, libgit2, etc? That's not necessarily a deal-breaker, but it would
be nice to know how they react.

It also only helps with v0 (and I agree with the sentiment that it would
be OK to ignore v0 at this point). For v2, we'd have to issue a HEAD
line like:

  0000000000000000000000000000000000000000 HEAD symref=refs/heads/foo

That probably would break clients, but the unborn capability should take
care of that.

Patch below (I think it only helps v0, but it could serve as a model for
doing the same thing in v2).

---
 upload-pack.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 1006bebd50..b0cc337dcb 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1218,20 +1218,21 @@ static int send_ref(const char *refname, const struct object_id *oid,
 	return 0;
 }
 
-static int find_symref(const char *refname, const struct object_id *oid,
-		       int flag, void *cb_data)
+static void find_symref(const char *refname, struct string_list *out)
 {
 	const char *symref_target;
 	struct string_list_item *item;
+	struct strbuf namespaced = STRBUF_INIT;
+	int flag;
+
+	strbuf_addf(&namespaced, "%s%s", get_git_namespace(), refname);
+	symref_target = resolve_ref_unsafe(namespaced.buf, 0, NULL, &flag);
+	strbuf_release(&namespaced);
 
-	if ((flag & REF_ISSYMREF) == 0)
-		return 0;
-	symref_target = resolve_ref_unsafe(refname, 0, NULL, &flag);
 	if (!symref_target || (flag & REF_ISSYMREF) == 0)
-		die("'%s' is a symref but it is not?", refname);
-	item = string_list_append(cb_data, strip_namespace(refname));
+		return;
+	item = string_list_append(out, refname);
 	item->util = xstrdup(strip_namespace(symref_target));
-	return 0;
 }
 
 static int parse_object_filter_config(const char *var, const char *value,
@@ -1326,7 +1327,7 @@ void upload_pack(struct upload_pack_options *options)
 	data.daemon_mode = options->daemon_mode;
 	data.timeout = options->timeout;
 
-	head_ref_namespaced(find_symref, &data.symref);
+	find_symref("HEAD", &data.symref);
 
 	if (options->advertise_refs || !data.stateless_rpc) {
 		reset_timeout(data.timeout);
Jonathan Tan Dec. 8, 2020, 6:55 p.m. UTC | #4
> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > Has anyone looked into solutions for this? Both protocol v0 and v2 do
> > not send symref information about unborn branches (v0 because, as
> > protocol-capabilities.txt says, "servers SHOULD include this capability
> > for the HEAD symref if it is one of the refs being sent"; v2 because
> > a symref is included only if it refers to one of the refs being sent).
> > In protocol v2, this could be done by adding a capability to ls-remote
> > (maybe, "unborn"), and in protocol v0, this could be done either by
> > updating the existing "symref" capability to be written even when the
> > target branch is unborn (which is potentially backwards incompatible) or
> > introducing a new capability which is like "symref".
> 
> Thanks for looking into this (I think this came up again today
> during my reviews of some topic).
> 
> It would be a backward incompatible change to add to v0, but at this
> point shouldn't we be leaving v0 as-is and move everybody to v2?

That makes sense.

> If it is a simple and safe enough change, though, saying "why not"
> is very tempting, though.

I'll look into how simple and safe it is.

> > A small issue is that upload-pack protocol v0 doesn't even write the
> > blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
> > branch, but that can be fixed as in the patch below.
> 
> I think the codepaths we have today in process_capabilities() and
> process_dummy_ref() (both in connect.c) would do the right thing
> when it sees a blank ref line even when nothing gets transported,
> but I smell that the rewrite of this state machine is fairly recent
> (say in the past few years) and I do not offhand know if clients
> before the rewrite of the state machine (say in v2.18.0) would be OK
> with the change.  It should be easy to check, though.

Yes - I backported my patch to v2.17.0 and it works:

  $ GIT_TRACE_PACKET=1 ~/git/bin-wrappers/git ls-remote "file://$(pwd)/empty"
  10:49:33.474111 pkt-line.c:80           packet:  upload-pack> 0000000000000000000000000000000000000000 capabilities^{}\0multi_ack thin-pack side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative no-progress include-tag multi_ack_detailed agent=git/2.17.0.dirty
  10:49:33.474182 pkt-line.c:80           packet:  upload-pack> 0000
  10:49:33.474243 pkt-line.c:80           packet:          git< 0000000000000000000000000000000000000000 capabilities^{}\0multi_ack thin-pack side-band side-band-64k ofs-delta shallow deepen-since deepen-not deepen-relative no-progress include-tag multi_ack_detailed agent=git/2.17.0.dirty
  10:49:33.474315 pkt-line.c:80           packet:          git< 0000
  10:49:33.474320 pkt-line.c:80           packet:          git> 0000
  10:49:33.474358 pkt-line.c:80           packet:  upload-pack< 0000
Jonathan Tan Dec. 8, 2020, 8:06 p.m. UTC | #5
> On Mon, Dec 07, 2020 at 05:31:20PM -0800, Jonathan Tan wrote:
> 
> > When cloning an empty repository, a local branch is created. But its
> > name is not the name of the branch that the remote HEAD points to - it
> > is the locally configured default branch name. This issue arose at
> > $DAYJOB and, from my memory, it is also not an uncommon workflow to
> > configure things online on a repo host and then use "git clone" so that
> > things like remotes are automatically configured.
> > 
> > Has anyone looked into solutions for this? Both protocol v0 and v2 do
> > not send symref information about unborn branches (v0 because, as
> > protocol-capabilities.txt says, "servers SHOULD include this capability
> > for the HEAD symref if it is one of the refs being sent"; v2 because
> > a symref is included only if it refers to one of the refs being sent).
> > In protocol v2, this could be done by adding a capability to ls-remote
> > (maybe, "unborn"), and in protocol v0, this could be done either by
> > updating the existing "symref" capability to be written even when the
> > target branch is unborn (which is potentially backwards incompatible) or
> > introducing a new capability which is like "symref".
> 
> We discussed this a few years ago, and I even wrote a small patch (for
> v0 at the time, of course):
> 
>   https://lore.kernel.org/git/20170525155924.hk5jskennph6tta3@sigill.intra.peff.net/
> 
> A rebased version of that patch is below (it needed updating to handle
> some namespacing stuff). Coupled with your patch here for the truly
> empty repo case, it makes the server side of v0 do what you'd want.
> 
> But the client side needs to handle it, too. See the linked thread for
> some discussion.

Thanks for the pointer.

> I wouldn't be too worried about the backwards incompatibility of sending
> a symref line in the capabilities that doesn't point to a ref we're
> sending. Old clients are quite likely to ignore it. But...
> 
> > A small issue is that upload-pack protocol v0 doesn't even write the
> > blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
> > branch, but that can be fixed as in the patch below.
> 
> I would worry how clients handle this bogus entry in the ref
> advertisement. It looks like the actual Git client is OK, but what about
> jgit, libgit2, etc? That's not necessarily a deal-breaker, but it would
> be nice to know how they react.

That bogus entry is defined in the protocol and JGit both produces and
consumes that line. Consumption was verified by patching Git with my
patch and running the following commands in separate terminals:

~/git/bin-wrappers/git daemon --port=9425 --base-path=. .
sudo tcpdump -i any port 9425 -w -
~/jgit/bazel-bin/org.eclipse.jgit.pgm/jgit ls-remote git://localhost:9425/empty

And production:

~/jgit/bazel-bin/org.eclipse.jgit.pgm/jgit daemon --port=9426 .
GIT_TRACE_PACKET=1 git ls-remote git://localhost:9426/empty

(Note that the JGit CLI does not have a separate --base-path parameter.)

I have not checked libgit2, but quite a few servers use JGit out there,
so it presumably should be able to interoperate with them and hence
support the bogus entry.

> It also only helps with v0 (and I agree with the sentiment that it would
> be OK to ignore v0 at this point). For v2, we'd have to issue a HEAD
> line like:
> 
>   0000000000000000000000000000000000000000 HEAD symref=refs/heads/foo
> 
> That probably would break clients, but the unborn capability should take
> care of that.

Yes - or a special string like "unborn" in place of the 000.000.
Junio C Hamano Dec. 8, 2020, 9 p.m. UTC | #6
Jonathan Tan <jonathantanmy@google.com> writes:

>> > A small issue is that upload-pack protocol v0 doesn't even write the
>> > blank ref line ("000...000 capabilities^{}") if HEAD points to an unborn
>> > branch, but that can be fixed as in the patch below.
>> 
>> I think the codepaths we have today in process_capabilities() and
>> process_dummy_ref() (both in connect.c) would do the right thing
>> when it sees a blank ref line even when nothing gets transported,
>> but I smell that the rewrite of this state machine is fairly recent
>> (say in the past few years) and I do not offhand know if clients
>> before the rewrite of the state machine (say in v2.18.0) would be OK
>> with the change.  It should be easy to check, though.
>
> Yes - I backported my patch to v2.17.0 and it works:

I wouldn't be surprised if other reimplementations of Git (like
jgit, libgit2 and Go or Python or whatever your favorite language)
barfs, though.
Jeff King Dec. 8, 2020, 9:15 p.m. UTC | #7
On Tue, Dec 08, 2020 at 12:06:49PM -0800, Jonathan Tan wrote:

> > I would worry how clients handle this bogus entry in the ref
> > advertisement. It looks like the actual Git client is OK, but what about
> > jgit, libgit2, etc? That's not necessarily a deal-breaker, but it would
> > be nice to know how they react.
> 
> That bogus entry is defined in the protocol and JGit both produces and
> consumes that line. Consumption was verified by patching Git with my
> patch and running the following commands in separate terminals:

Ah, indeed.

I forgot that we went through all of this a few years ago for your
eb398797cd (connect: advertized capability is not a ref, 2016-09-09). I
stand behind the "it was probably originally an error in the protocol
documentation" from [1], but at this point I think we can say it's a
supported part of the protocol.

All of this is moot, of course, if we only do the v2 solution. :)

-Peff

[1] https://lore.kernel.org/git/20160902233547.mzgluioc7hhabalw@sigill.intra.peff.net/
diff mbox series

Patch

diff --git a/upload-pack.c b/upload-pack.c
index 1006bebd50..d2359a8560 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1179,18 +1179,15 @@  static void format_symref_info(struct strbuf *buf, struct string_list *symref)
 		strbuf_addf(buf, " symref=%s:%s", item->string, (char *)item->util);
 }
 
-static int send_ref(const char *refname, const struct object_id *oid,
-		    int flag, void *cb_data)
+static const char *capabilities = "multi_ack thin-pack side-band"
+	" side-band-64k ofs-delta shallow deepen-since deepen-not"
+	" deepen-relative no-progress include-tag multi_ack_detailed";
+
+static void write_ref_lines(const char *refname_nons,
+			    const struct object_id *oid,
+			    const struct object_id *peeled,
+			    struct upload_pack_data *data)
 {
-	static const char *capabilities = "multi_ack thin-pack side-band"
-		" side-band-64k ofs-delta shallow deepen-since deepen-not"
-		" deepen-relative no-progress include-tag multi_ack_detailed";
-	const char *refname_nons = strip_namespace(refname);
-	struct object_id peeled;
-	struct upload_pack_data *data = cb_data;
-
-	if (mark_our_ref(refname_nons, refname, oid))
-		return 0;
 
 	if (capabilities) {
 		struct strbuf symref_info = STRBUF_INIT;
@@ -1213,8 +1210,23 @@  static int send_ref(const char *refname, const struct object_id *oid,
 		packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
 	}
 	capabilities = NULL;
-	if (!peel_ref(refname, &peeled))
-		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
+	if (peeled)
+		packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(peeled), refname_nons);
+}
+
+static int send_ref(const char *refname, const struct object_id *oid,
+		    int flag, void *cb_data)
+{
+	const char *refname_nons = strip_namespace(refname);
+	struct object_id peeled;
+	struct upload_pack_data *data = cb_data;
+
+	if (mark_our_ref(refname_nons, refname, oid))
+		return 0;
+	write_ref_lines(refname_nons,
+			oid,
+			peel_ref(refname, &peeled) ? NULL : &peeled,
+			data);
 	return 0;
 }
 
@@ -1332,6 +1344,8 @@  void upload_pack(struct upload_pack_options *options)
 		reset_timeout(data.timeout);
 		head_ref_namespaced(send_ref, &data);
 		for_each_namespaced_ref(send_ref, &data);
+		if (capabilities)
+			write_ref_lines("capabilities^{}", &null_oid, NULL, &data);
 		advertise_shallow_grafts(1);
 		packet_flush(1);
 	} else {