diff mbox series

[3/3] archive: allow archive over HTTP(S) with proto v2

Message ID 20180912053519.31085-4-steadmon@google.com (mailing list archive)
State Superseded
Headers show
Series [1/3] archive: use packet_reader for communications | expand

Commit Message

Josh Steadmon Sept. 12, 2018, 5:35 a.m. UTC
Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 builtin/archive.c  |  8 +++++++-
 http-backend.c     | 10 +++++++++-
 transport-helper.c |  5 +++--
 3 files changed, 19 insertions(+), 4 deletions(-)

Comments

Stefan Beller Sept. 12, 2018, 10:38 p.m. UTC | #1
On Tue, Sep 11, 2018 at 10:36 PM Josh Steadmon <steadmon@google.com> wrote:
>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  builtin/archive.c  |  8 +++++++-
>  http-backend.c     | 10 +++++++++-
>  transport-helper.c |  5 +++--
>  3 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/builtin/archive.c b/builtin/archive.c
> index 73831887d..5fa75b3f7 100644
> --- a/builtin/archive.c
> +++ b/builtin/archive.c
> @@ -87,7 +87,13 @@ static int run_remote_archiver(int argc, const char **argv,
>                 status = packet_reader_read(&reader);
>                 if (status != PACKET_READ_FLUSH)
>                         die(_("git archive: expected a flush"));
> -       }
> +       } else if (version == protocol_v2 &&
> +                  starts_with(transport->url, "http"))

I assume this is a smart way to cover both http and https
as the latter is prefixed by the former.

How does this interact with remote helpers
(See gitremote-helpers(1), which doesn't mention archives,
but I would imagine that one would be able to use a remote
helper eventually, too?

    git archive --remote=abc://example.org/test ....

> +               /*
> +                * Commands over HTTP require two requests, so there's an
> +                * additional server response to parse.
> +                */
> +               discover_version(&reader);
>
>         /* Now, start reading from fd[0] and spit it out to stdout */
>         rv = recv_sideband("archive", fd[0], 1);
> diff --git a/http-backend.c b/http-backend.c
> index 458642ef7..d62d583c7 100644
> --- a/http-backend.c
> +++ b/http-backend.c
> @@ -32,6 +32,7 @@ struct rpc_service {
>  static struct rpc_service rpc_service[] = {
>         { "upload-pack", "uploadpack", 1, 1 },
>         { "receive-pack", "receivepack", 0, -1 },
> +       { "upload-archive", "uploadarchive", 1, 1 },

So git archives are not supported in protocol v0 via http?
Then it makes sense to see not mention of archives in
the remote helpers as well.

The rest of the code is a surprisingly small patch.

Stefan
Junio C Hamano Sept. 13, 2018, 4:47 p.m. UTC | #2
Josh Steadmon <steadmon@google.com> writes:

> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  builtin/archive.c  |  8 +++++++-
>  http-backend.c     | 10 +++++++++-
>  transport-helper.c |  5 +++--
>  3 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/builtin/archive.c b/builtin/archive.c
> index 73831887d..5fa75b3f7 100644
> --- a/builtin/archive.c
> +++ b/builtin/archive.c
> @@ -87,7 +87,13 @@ static int run_remote_archiver(int argc, const char **argv,
>  		status = packet_reader_read(&reader);
>  		if (status != PACKET_READ_FLUSH)
>  			die(_("git archive: expected a flush"));
> -	}
> +	} else if (version == protocol_v2 &&
> +		   starts_with(transport->url, "http"))
> +		/*
> +		 * Commands over HTTP require two requests, so there's an
> +		 * additional server response to parse.
> +		 */
> +		discover_version(&reader);

What should happen if the version discovered here is different from
v2 or the capabilities offered are different from what we saw
before?  Perhaps we need some sanity checks, as we on this side of
the connection know we are making two requests, and may even end up
talking with an instance of "upload-archive" that is different from
the one we talked with earlier.

> diff --git a/http-backend.c b/http-backend.c
> index 458642ef7..d62d583c7 100644
> --- a/http-backend.c
> +++ b/http-backend.c
> @@ -32,6 +32,7 @@ struct rpc_service {
>  static struct rpc_service rpc_service[] = {
>  	{ "upload-pack", "uploadpack", 1, 1 },
>  	{ "receive-pack", "receivepack", 0, -1 },
> +	{ "upload-archive", "uploadarchive", 1, 1 },
>  };
>  
>  static struct string_list *get_parameters(void)
> @@ -637,6 +638,12 @@ static void service_rpc(struct strbuf *hdr, char *service_name)
>  	struct rpc_service *svc = select_service(hdr, service_name);
>  	struct strbuf buf = STRBUF_INIT;
>  
> +	if (!strcmp(service_name, "git-upload-archive")) {
> +		/* git-upload-archive doesn't need --stateless-rpc */
> +		argv[1] = ".";
> +		argv[2] = NULL;
> +	}
> +
>  	strbuf_reset(&buf);
>  	strbuf_addf(&buf, "application/x-git-%s-request", svc->name);
>  	check_content_type(hdr, buf.buf);
> @@ -713,7 +720,8 @@ static struct service_cmd {
>  	{"GET", "/objects/pack/pack-[0-9a-f]{40}\\.idx$", get_idx_file},
>  
>  	{"POST", "/git-upload-pack$", service_rpc},
> -	{"POST", "/git-receive-pack$", service_rpc}
> +	{"POST", "/git-receive-pack$", service_rpc},
> +	{"POST", "/git-upload-archive$", service_rpc},
>  };
>  
>  static int bad_request(struct strbuf *hdr, const struct service_cmd *c)
> diff --git a/transport-helper.c b/transport-helper.c
> index 143ca008c..b4b96fc89 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -605,7 +605,8 @@ static int process_connect_service(struct transport *transport,
>  		ret = run_connect(transport, &cmdbuf);
>  	} else if (data->stateless_connect &&
>  		   (get_protocol_version_config() == protocol_v2) &&
> -		   !strcmp("git-upload-pack", name)) {
> +		   (!strcmp("git-upload-pack", name) ||
> +		    !strcmp("git-upload-archive", name))) {
>  		strbuf_addf(&cmdbuf, "stateless-connect %s\n", name);
>  		ret = run_connect(transport, &cmdbuf);
>  		if (ret)
> @@ -639,7 +640,7 @@ static int connect_helper(struct transport *transport, const char *name,
>  
>  	/* Get_helper so connect is inited. */
>  	get_helper(transport);
> -	if (!data->connect)
> +	if (!data->connect && !data->stateless_connect)
>  		die(_("operation not supported by protocol"));

This is somewhat curious.  The upload-pack going over HTTP also is
triggered by the same "stateless-connect" remote helper command, as
we just saw in the previous hunk, and that support is not new.  Why
do we need this change then?  What's different between the "data"
that is obtained by get_helper(transport) for driving upload-pack
and upload-archive?  Presumably upload-pack was working without this
change because it also sets the connect bit on, and upload-archive
does not work without this change because it does not?  Why do these
two behave differently?

>  	if (!process_connect_service(transport, name, exec))
Jonathan Nieder Sept. 14, 2018, 5:57 a.m. UTC | #3
Hi,

Josh Steadmon wrote:

> Subject: archive: allow archive over HTTP(S) with proto v2

It's interesting how little this has to touch the client.

> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>  builtin/archive.c  |  8 +++++++-
>  http-backend.c     | 10 +++++++++-
>  transport-helper.c |  5 +++--
>  3 files changed, 19 insertions(+), 4 deletions(-)
[....]
> --- a/builtin/archive.c
> +++ b/builtin/archive.c
> @@ -87,7 +87,13 @@ static int run_remote_archiver(int argc, const char **argv,
>  		status = packet_reader_read(&reader);
>  		if (status != PACKET_READ_FLUSH)
>  			die(_("git archive: expected a flush"));
> -	}
> +	} else if (version == protocol_v2 &&
> +		   starts_with(transport->url, "http"))

As Stefan noticed, this starts_with test seems a bit too loose.  For
example, what happens if I try an scp-style SSH URL like
http.example.com:path/to/repo, a local path like http/foo/bar, or a
custom protocol like httplikebutbetter://path/to/repo (honest
question: I haven't tried)?

> +		/*
> +		 * Commands over HTTP require two requests, so there's an
> +		 * additional server response to parse.
> +		 */
> +		discover_version(&reader);

Can this be made consistent with the non-http case?  The original
capabilities (/info/refs) response told us what protocol version the
server wants to use, which means that a hypothetical protocol v3 could
use a completely different request format for the followup commands:
so could the server omit the protocol version in the v2
/git-upload-archive response?  Alternatively, if we want to include
the protocol version again, could we do that in stateful protocols as
well?

Related question: what should happen if the two responses declare
different protocol versions?  Should we diagnose that as a protocol
error?

[...]
> --- a/http-backend.c
> +++ b/http-backend.c
> @@ -32,6 +32,7 @@ struct rpc_service {
>  static struct rpc_service rpc_service[] = {
>  	{ "upload-pack", "uploadpack", 1, 1 },
>  	{ "receive-pack", "receivepack", 0, -1 },
> +	{ "upload-archive", "uploadarchive", 1, 1 },

shell.c orders these in almost-alphabetical order (receive-pack,
upload-pack, upload-archive).  I guess they should both use actual
alphabetical order?  (If you agree, then please feel free to do that
in a separate patch.)

[...]
> @@ -637,6 +638,12 @@ static void service_rpc(struct strbuf *hdr, char *service_name)
>  	struct rpc_service *svc = select_service(hdr, service_name);
>  	struct strbuf buf = STRBUF_INIT;
>  
> +	if (!strcmp(service_name, "git-upload-archive")) {
> +		/* git-upload-archive doesn't need --stateless-rpc */

This comment doesn't seem actionable.  Can it say why?  E.g. "[...]
because an upload-archive command always involves a single
round-trip".  Or alternatively, I think it's fine to omit the comment.

> +		argv[1] = ".";
> +		argv[2] = NULL;
> +	}
[...]
> @@ -713,7 +720,8 @@ static struct service_cmd {
>  	{"GET", "/objects/pack/pack-[0-9a-f]{40}\\.idx$", get_idx_file},
>  
>  	{"POST", "/git-upload-pack$", service_rpc},
> -	{"POST", "/git-receive-pack$", service_rpc}
> +	{"POST", "/git-receive-pack$", service_rpc},
> +	{"POST", "/git-upload-archive$", service_rpc},

Same comment about services seeming to be in a randomish order.

[...]
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -605,7 +605,8 @@ static int process_connect_service(struct transport *transport,
>  		ret = run_connect(transport, &cmdbuf);
>  	} else if (data->stateless_connect &&
>  		   (get_protocol_version_config() == protocol_v2) &&

(not about this patch) These parens don't help --- they make it harder
for me to read, especially with the new parens to try to match them up
with.

> -		   !strcmp("git-upload-pack", name)) {
> +		   (!strcmp("git-upload-pack", name) ||
> +		    !strcmp("git-upload-archive", name))) {

A part of me wonders about the wasted cycles comparing to
"git-upload-" twice, but (1) it is tiny relative to actually serving
the request and (2) if we're lucky, the compiler (or a compiler of the
future) inlines the strcmp call and could optimize it out.

[...]
> @@ -639,7 +640,7 @@ static int connect_helper(struct transport *transport, const char *name,
>  
>  	/* Get_helper so connect is inited. */
>  	get_helper(transport);
> -	if (!data->connect)
> +	if (!data->connect && !data->stateless_connect)
>  		die(_("operation not supported by protocol"));

I don't understand this part.  Can you explain it further (possibly by
putting it in its own patch)?

Thanks for a pleasant read,
Jonathan
Josh Steadmon Sept. 27, 2018, 8:28 p.m. UTC | #4
On 2018.09.13 09:47, Junio C Hamano wrote:
> Josh Steadmon <steadmon@google.com> writes:
> 
> > Signed-off-by: Josh Steadmon <steadmon@google.com>
> > ---
> >  builtin/archive.c  |  8 +++++++-
> >  http-backend.c     | 10 +++++++++-
> >  transport-helper.c |  5 +++--
> >  3 files changed, 19 insertions(+), 4 deletions(-)
> >
> > diff --git a/builtin/archive.c b/builtin/archive.c
> > index 73831887d..5fa75b3f7 100644
> > --- a/builtin/archive.c
> > +++ b/builtin/archive.c
> > @@ -87,7 +87,13 @@ static int run_remote_archiver(int argc, const char **argv,
> >  		status = packet_reader_read(&reader);
> >  		if (status != PACKET_READ_FLUSH)
> >  			die(_("git archive: expected a flush"));
> > -	}
> > +	} else if (version == protocol_v2 &&
> > +		   starts_with(transport->url, "http"))
> > +		/*
> > +		 * Commands over HTTP require two requests, so there's an
> > +		 * additional server response to parse.
> > +		 */
> > +		discover_version(&reader);
> 
> What should happen if the version discovered here is different from
> v2 or the capabilities offered are different from what we saw
> before?  Perhaps we need some sanity checks, as we on this side of
> the connection know we are making two requests, and may even end up
> talking with an instance of "upload-archive" that is different from
> the one we talked with earlier.
> 
> > diff --git a/http-backend.c b/http-backend.c
> > index 458642ef7..d62d583c7 100644
> > --- a/http-backend.c
> > +++ b/http-backend.c
> > @@ -32,6 +32,7 @@ struct rpc_service {
> >  static struct rpc_service rpc_service[] = {
> >  	{ "upload-pack", "uploadpack", 1, 1 },
> >  	{ "receive-pack", "receivepack", 0, -1 },
> > +	{ "upload-archive", "uploadarchive", 1, 1 },
> >  };
> >  
> >  static struct string_list *get_parameters(void)
> > @@ -637,6 +638,12 @@ static void service_rpc(struct strbuf *hdr, char *service_name)
> >  	struct rpc_service *svc = select_service(hdr, service_name);
> >  	struct strbuf buf = STRBUF_INIT;
> >  
> > +	if (!strcmp(service_name, "git-upload-archive")) {
> > +		/* git-upload-archive doesn't need --stateless-rpc */
> > +		argv[1] = ".";
> > +		argv[2] = NULL;
> > +	}
> > +
> >  	strbuf_reset(&buf);
> >  	strbuf_addf(&buf, "application/x-git-%s-request", svc->name);
> >  	check_content_type(hdr, buf.buf);
> > @@ -713,7 +720,8 @@ static struct service_cmd {
> >  	{"GET", "/objects/pack/pack-[0-9a-f]{40}\\.idx$", get_idx_file},
> >  
> >  	{"POST", "/git-upload-pack$", service_rpc},
> > -	{"POST", "/git-receive-pack$", service_rpc}
> > +	{"POST", "/git-receive-pack$", service_rpc},
> > +	{"POST", "/git-upload-archive$", service_rpc},
> >  };
> >  
> >  static int bad_request(struct strbuf *hdr, const struct service_cmd *c)
> > diff --git a/transport-helper.c b/transport-helper.c
> > index 143ca008c..b4b96fc89 100644
> > --- a/transport-helper.c
> > +++ b/transport-helper.c
> > @@ -605,7 +605,8 @@ static int process_connect_service(struct transport *transport,
> >  		ret = run_connect(transport, &cmdbuf);
> >  	} else if (data->stateless_connect &&
> >  		   (get_protocol_version_config() == protocol_v2) &&
> > -		   !strcmp("git-upload-pack", name)) {
> > +		   (!strcmp("git-upload-pack", name) ||
> > +		    !strcmp("git-upload-archive", name))) {
> >  		strbuf_addf(&cmdbuf, "stateless-connect %s\n", name);
> >  		ret = run_connect(transport, &cmdbuf);
> >  		if (ret)
> > @@ -639,7 +640,7 @@ static int connect_helper(struct transport *transport, const char *name,
> >  
> >  	/* Get_helper so connect is inited. */
> >  	get_helper(transport);
> > -	if (!data->connect)
> > +	if (!data->connect && !data->stateless_connect)
> >  		die(_("operation not supported by protocol"));
> 
> This is somewhat curious.  The upload-pack going over HTTP also is
> triggered by the same "stateless-connect" remote helper command, as
> we just saw in the previous hunk, and that support is not new.  Why
> do we need this change then?  What's different between the "data"
> that is obtained by get_helper(transport) for driving upload-pack
> and upload-archive?  Presumably upload-pack was working without this
> change because it also sets the connect bit on, and upload-archive
> does not work without this change because it does not?  Why do these
> two behave differently?

The data struct is not different between upload-pack vs. upload-archive.
Neither of them set the connect bit. The difference is that upload-pack
doesn't need to call transport_connect(), so it never hits
connect_helper().
diff mbox series

Patch

diff --git a/builtin/archive.c b/builtin/archive.c
index 73831887d..5fa75b3f7 100644
--- a/builtin/archive.c
+++ b/builtin/archive.c
@@ -87,7 +87,13 @@  static int run_remote_archiver(int argc, const char **argv,
 		status = packet_reader_read(&reader);
 		if (status != PACKET_READ_FLUSH)
 			die(_("git archive: expected a flush"));
-	}
+	} else if (version == protocol_v2 &&
+		   starts_with(transport->url, "http"))
+		/*
+		 * Commands over HTTP require two requests, so there's an
+		 * additional server response to parse.
+		 */
+		discover_version(&reader);
 
 	/* Now, start reading from fd[0] and spit it out to stdout */
 	rv = recv_sideband("archive", fd[0], 1);
diff --git a/http-backend.c b/http-backend.c
index 458642ef7..d62d583c7 100644
--- a/http-backend.c
+++ b/http-backend.c
@@ -32,6 +32,7 @@  struct rpc_service {
 static struct rpc_service rpc_service[] = {
 	{ "upload-pack", "uploadpack", 1, 1 },
 	{ "receive-pack", "receivepack", 0, -1 },
+	{ "upload-archive", "uploadarchive", 1, 1 },
 };
 
 static struct string_list *get_parameters(void)
@@ -637,6 +638,12 @@  static void service_rpc(struct strbuf *hdr, char *service_name)
 	struct rpc_service *svc = select_service(hdr, service_name);
 	struct strbuf buf = STRBUF_INIT;
 
+	if (!strcmp(service_name, "git-upload-archive")) {
+		/* git-upload-archive doesn't need --stateless-rpc */
+		argv[1] = ".";
+		argv[2] = NULL;
+	}
+
 	strbuf_reset(&buf);
 	strbuf_addf(&buf, "application/x-git-%s-request", svc->name);
 	check_content_type(hdr, buf.buf);
@@ -713,7 +720,8 @@  static struct service_cmd {
 	{"GET", "/objects/pack/pack-[0-9a-f]{40}\\.idx$", get_idx_file},
 
 	{"POST", "/git-upload-pack$", service_rpc},
-	{"POST", "/git-receive-pack$", service_rpc}
+	{"POST", "/git-receive-pack$", service_rpc},
+	{"POST", "/git-upload-archive$", service_rpc},
 };
 
 static int bad_request(struct strbuf *hdr, const struct service_cmd *c)
diff --git a/transport-helper.c b/transport-helper.c
index 143ca008c..b4b96fc89 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -605,7 +605,8 @@  static int process_connect_service(struct transport *transport,
 		ret = run_connect(transport, &cmdbuf);
 	} else if (data->stateless_connect &&
 		   (get_protocol_version_config() == protocol_v2) &&
-		   !strcmp("git-upload-pack", name)) {
+		   (!strcmp("git-upload-pack", name) ||
+		    !strcmp("git-upload-archive", name))) {
 		strbuf_addf(&cmdbuf, "stateless-connect %s\n", name);
 		ret = run_connect(transport, &cmdbuf);
 		if (ret)
@@ -639,7 +640,7 @@  static int connect_helper(struct transport *transport, const char *name,
 
 	/* Get_helper so connect is inited. */
 	get_helper(transport);
-	if (!data->connect)
+	if (!data->connect && !data->stateless_connect)
 		die(_("operation not supported by protocol"));
 
 	if (!process_connect_service(transport, name, exec))