diff mbox series

[1/5] remote-curl: add 'get' capability

Message ID 40808e92afb7bcf3e8e9b4b53d5e30b5e17816f8.1658781277.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Bundle URIs II: git clone --bundle-uri | expand

Commit Message

Derrick Stolee July 25, 2022, 8:34 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

A future change will want a way to download a file over HTTP(S) using
the simplest of download mechanisms. We do not want to assume that the
server on the other side understands anything about the Git protocol but
could be a simple static web server.

Create the new 'get' capability for the remote helpers which advertises
that the 'get' command is avalable. A caller can send a line containing
'get <url> <path>' to download the file at <url> into the file at
<path>.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 Documentation/gitremote-helpers.txt |  9 +++++++
 remote-curl.c                       | 32 +++++++++++++++++++++++++
 t/t5557-http-get.sh                 | 37 +++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100755 t/t5557-http-get.sh

Comments

Josh Steadmon July 27, 2022, 10:08 p.m. UTC | #1
On 2022.07.25 20:34, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
> 
> A future change will want a way to download a file over HTTP(S) using
> the simplest of download mechanisms. We do not want to assume that the
> server on the other side understands anything about the Git protocol but
> could be a simple static web server.
> 
> Create the new 'get' capability for the remote helpers which advertises
> that the 'get' command is avalable. A caller can send a line containing
> 'get <url> <path>' to download the file at <url> into the file at
> <path>.
> 
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  Documentation/gitremote-helpers.txt |  9 +++++++
>  remote-curl.c                       | 32 +++++++++++++++++++++++++
>  t/t5557-http-get.sh                 | 37 +++++++++++++++++++++++++++++
>  3 files changed, 78 insertions(+)
>  create mode 100755 t/t5557-http-get.sh
> 
> diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt
> index 6f1e269ae43..ed8da428c98 100644
> --- a/Documentation/gitremote-helpers.txt
> +++ b/Documentation/gitremote-helpers.txt
> @@ -168,6 +168,9 @@ Supported commands: 'list', 'import'.
>  	Can guarantee that when a clone is requested, the received
>  	pack is self contained and is connected.
>  
> +'get'::
> +	Can use the 'get' command to download a file from a given URI.
> +
>  If a helper advertises 'connect', Git will use it if possible and
>  fall back to another capability if the helper requests so when
>  connecting (see the 'connect' command under COMMANDS).
> @@ -418,6 +421,12 @@ Supported if the helper has the "connect" capability.
>  +
>  Supported if the helper has the "stateless-connect" capability.
>  
> +'get' <uri> <path>::
> +	Downloads the file from the given `<uri>` to the given `<path>`. If
> +	`<path>.temp` exists, then Git assumes that the `.temp` file is a
> +	partial download from a previous attempt and will resume the
> +	download from that position.
> +
>  If a fatal error occurs, the program writes the error message to
>  stderr and exits. The caller should expect that a suitable error
>  message has been printed if the child closes the connection without
> diff --git a/remote-curl.c b/remote-curl.c
> index b8758757ece..73fbdbddd84 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -1286,6 +1286,33 @@ static void parse_fetch(struct strbuf *buf)
>  	strbuf_reset(buf);
>  }
>  
> +static void parse_get(struct strbuf *buf)
> +{
> +	struct strbuf url = STRBUF_INIT;
> +	struct strbuf path = STRBUF_INIT;
> +	const char *p, *space;
> +
> +	if (!skip_prefix(buf->buf, "get ", &p))
> +		die(_("http transport does not support %s"), buf->buf);

Nit: since we're already calling skip_prefix(...) in cmd_main() below,
can we just pass the suffix to parse_get() and avoid having to skip the
prefix twice?


> +
> +	space = strchr(p, ' ');
> +
> +	if (!space)
> +		die(_("protocol error: expected '<url> <path>', missing space"));
> +
> +	strbuf_add(&url, p, space - p);
> +	strbuf_addstr(&path, space + 1);
> +
> +	if (http_get_file(url.buf, path.buf, NULL))
> +		die(_("failed to download file at URL '%s'"), url.buf);
> +
> +	strbuf_release(&url);
> +	strbuf_release(&path);
> +	printf("\n");
> +	fflush(stdout);
> +	strbuf_reset(buf);
> +}
> +
>  static int push_dav(int nr_spec, const char **specs)
>  {
>  	struct child_process child = CHILD_PROCESS_INIT;
> @@ -1564,9 +1591,14 @@ int cmd_main(int argc, const char **argv)
>  				printf("unsupported\n");
>  			fflush(stdout);
>  
> +		} else if (skip_prefix(buf.buf, "get ", &arg)) {
> +			parse_get(&buf);
> +			fflush(stdout);
> +
>  		} else if (!strcmp(buf.buf, "capabilities")) {
>  			printf("stateless-connect\n");
>  			printf("fetch\n");
> +			printf("get\n");
>  			printf("option\n");
>  			printf("push\n");
>  			printf("check-connectivity\n");
> diff --git a/t/t5557-http-get.sh b/t/t5557-http-get.sh
> new file mode 100755
> index 00000000000..50b7dbcf957
> --- /dev/null
> +++ b/t/t5557-http-get.sh
> @@ -0,0 +1,37 @@
> +#!/bin/sh
> +
> +test_description='test downloading a file by URL'
> +
> +. ./test-lib.sh
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'get by URL: 404' '
> +	url="$HTTPD_URL/none.txt" &&
> +	cat >input <<-EOF &&
> +	capabilities
> +	get $url file1
> +	EOF
> +
> +	test_must_fail git remote-http $url <input 2>err &&
> +	test_path_is_missing file1 &&
> +	grep "failed to download file at URL" err &&
> +	rm file1.temp
> +'
> +
> +test_expect_success 'get by URL: 200' '
> +	echo data >"$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" &&
> +
> +	url="$HTTPD_URL/exists.txt" &&
> +	cat >input <<-EOF &&
> +	capabilities
> +	get $url file2
> +
> +	EOF
> +
> +	GIT_TRACE2_PERF=1 git remote-http $url <input &&
> +	test_cmp "$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" file2
> +'
> +
> +test_done
> -- 
> gitgitgadget
>
Ævar Arnfjörð Bjarmason July 27, 2022, 11 p.m. UTC | #2
On Mon, Jul 25 2022, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <derrickstolee@github.com>
> [...]

Add a:

	TEST_PASSES_SANITIZE_LEAK=true

Before:

> +. ./test-lib.sh
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'get by URL: 404' '
> +	url="$HTTPD_URL/none.txt" &&
> +	cat >input <<-EOF &&
> +	capabilities
> +	get $url file1
> +	EOF
> +
> +	test_must_fail git remote-http $url <input 2>err &&
> +	test_path_is_missing file1 &&
> +	grep "failed to download file at URL" err &&
> +	rm file1.temp

This should presumably be a :

	test_when_finished "rm file.temp"

Before the "test_must_fail", otherwise we'll leave this around if
e.g. the "grep" in the &&-chain fails, but we surely want to clean this
up in that case..

> +'
> +
> +test_expect_success 'get by URL: 200' '
> +	echo data >"$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" &&
> +
> +	url="$HTTPD_URL/exists.txt" &&
> +	cat >input <<-EOF &&
> +	capabilities
> +	get $url file2
> +
> +	EOF
> +
> +	GIT_TRACE2_PERF=1 git remote-http $url <input &&

Is this ad-hoc debugging that snuck into the test? Nothing here needs
this GIT_TRACE2_PERF...
Derrick Stolee Aug. 1, 2022, 1:55 p.m. UTC | #3
On 7/27/2022 7:00 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> On Mon, Jul 25 2022, Derrick Stolee via GitGitGadget wrote:
> 
>> From: Derrick Stolee <derrickstolee@github.com>
>> [...]
> 
> Add a:
> 
> 	TEST_PASSES_SANITIZE_LEAK=true

I trust this is important for your series that flags tests
that _could_ pass with this. Thanks.

>> +. ./test-lib.sh
>> +
>> +. "$TEST_DIRECTORY"/lib-httpd.sh
>> +start_httpd
>> +
>> +test_expect_success 'get by URL: 404' '
>> +	url="$HTTPD_URL/none.txt" &&
>> +	cat >input <<-EOF &&
>> +	capabilities
>> +	get $url file1
>> +	EOF
>> +
>> +	test_must_fail git remote-http $url <input 2>err &&
>> +	test_path_is_missing file1 &&
>> +	grep "failed to download file at URL" err &&
>> +	rm file1.temp
> 
> This should presumably be a :
> 
> 	test_when_finished "rm file.temp"

Yes. Thanks.

>> +'
>> +
>> +test_expect_success 'get by URL: 200' '
>> +	echo data >"$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" &&
>> +
>> +	url="$HTTPD_URL/exists.txt" &&
>> +	cat >input <<-EOF &&
>> +	capabilities
>> +	get $url file2
>> +
>> +	EOF
>> +
>> +	GIT_TRACE2_PERF=1 git remote-http $url <input &&
> 
> Is this ad-hoc debugging that snuck into the test? Nothing here needs
> this GIT_TRACE2_PERF...

Good eye. Removed.

Thanks,
-Stolee
Ævar Arnfjörð Bjarmason Aug. 1, 2022, 2:39 p.m. UTC | #4
On Mon, Aug 01 2022, Derrick Stolee wrote:

> On 7/27/2022 7:00 PM, Ævar Arnfjörð Bjarmason wrote:
>> 
>> On Mon, Jul 25 2022, Derrick Stolee via GitGitGadget wrote:
>> 
>>> From: Derrick Stolee <derrickstolee@github.com>
>>> [...]
>> 
>> Add a:
>> 
>> 	TEST_PASSES_SANITIZE_LEAK=true
>
> I trust this is important for your series that flags tests
> that _could_ pass with this. Thanks.

Not really, it does add a "check" mode, and this is one of the few tests
crop up when merged with "seen" if run in that mode, but currentlny it's
only run manually, and we have other outstanding deviations.

It's just something for "master": We have the "linux-leaks" job for a
while now, so it makes sense for new tests to declare if they're
leak-free or not, so we guard against the introduction of leaks with new
code, if possible.

I (or someone else) can always follow-up with something like[1], but as
it's easy it makes sense to just add it in the first place, thanks.

1. https://lore.kernel.org/git/patch-v3-13.15-28255ac3239-20220727T230800Z-avarab@gmail.com/
diff mbox series

Patch

diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt
index 6f1e269ae43..ed8da428c98 100644
--- a/Documentation/gitremote-helpers.txt
+++ b/Documentation/gitremote-helpers.txt
@@ -168,6 +168,9 @@  Supported commands: 'list', 'import'.
 	Can guarantee that when a clone is requested, the received
 	pack is self contained and is connected.
 
+'get'::
+	Can use the 'get' command to download a file from a given URI.
+
 If a helper advertises 'connect', Git will use it if possible and
 fall back to another capability if the helper requests so when
 connecting (see the 'connect' command under COMMANDS).
@@ -418,6 +421,12 @@  Supported if the helper has the "connect" capability.
 +
 Supported if the helper has the "stateless-connect" capability.
 
+'get' <uri> <path>::
+	Downloads the file from the given `<uri>` to the given `<path>`. If
+	`<path>.temp` exists, then Git assumes that the `.temp` file is a
+	partial download from a previous attempt and will resume the
+	download from that position.
+
 If a fatal error occurs, the program writes the error message to
 stderr and exits. The caller should expect that a suitable error
 message has been printed if the child closes the connection without
diff --git a/remote-curl.c b/remote-curl.c
index b8758757ece..73fbdbddd84 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -1286,6 +1286,33 @@  static void parse_fetch(struct strbuf *buf)
 	strbuf_reset(buf);
 }
 
+static void parse_get(struct strbuf *buf)
+{
+	struct strbuf url = STRBUF_INIT;
+	struct strbuf path = STRBUF_INIT;
+	const char *p, *space;
+
+	if (!skip_prefix(buf->buf, "get ", &p))
+		die(_("http transport does not support %s"), buf->buf);
+
+	space = strchr(p, ' ');
+
+	if (!space)
+		die(_("protocol error: expected '<url> <path>', missing space"));
+
+	strbuf_add(&url, p, space - p);
+	strbuf_addstr(&path, space + 1);
+
+	if (http_get_file(url.buf, path.buf, NULL))
+		die(_("failed to download file at URL '%s'"), url.buf);
+
+	strbuf_release(&url);
+	strbuf_release(&path);
+	printf("\n");
+	fflush(stdout);
+	strbuf_reset(buf);
+}
+
 static int push_dav(int nr_spec, const char **specs)
 {
 	struct child_process child = CHILD_PROCESS_INIT;
@@ -1564,9 +1591,14 @@  int cmd_main(int argc, const char **argv)
 				printf("unsupported\n");
 			fflush(stdout);
 
+		} else if (skip_prefix(buf.buf, "get ", &arg)) {
+			parse_get(&buf);
+			fflush(stdout);
+
 		} else if (!strcmp(buf.buf, "capabilities")) {
 			printf("stateless-connect\n");
 			printf("fetch\n");
+			printf("get\n");
 			printf("option\n");
 			printf("push\n");
 			printf("check-connectivity\n");
diff --git a/t/t5557-http-get.sh b/t/t5557-http-get.sh
new file mode 100755
index 00000000000..50b7dbcf957
--- /dev/null
+++ b/t/t5557-http-get.sh
@@ -0,0 +1,37 @@ 
+#!/bin/sh
+
+test_description='test downloading a file by URL'
+
+. ./test-lib.sh
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'get by URL: 404' '
+	url="$HTTPD_URL/none.txt" &&
+	cat >input <<-EOF &&
+	capabilities
+	get $url file1
+	EOF
+
+	test_must_fail git remote-http $url <input 2>err &&
+	test_path_is_missing file1 &&
+	grep "failed to download file at URL" err &&
+	rm file1.temp
+'
+
+test_expect_success 'get by URL: 200' '
+	echo data >"$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" &&
+
+	url="$HTTPD_URL/exists.txt" &&
+	cat >input <<-EOF &&
+	capabilities
+	get $url file2
+
+	EOF
+
+	GIT_TRACE2_PERF=1 git remote-http $url <input &&
+	test_cmp "$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" file2
+'
+
+test_done