diff mbox series

[v2,9/9] clone: unbundle the advertised bundles

Message ID b62b4b1748194f0c7b81536701f15aa0df8e1d9b.1668628303.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Bundle URIs IV: advertise over protocol v2 | expand

Commit Message

Derrick Stolee Nov. 16, 2022, 7:51 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

A previous change introduced the transport methods to acquire a bundle
list from the 'bundle-uri' protocol v2 command, when advertised _and_
when the client has chosen to enable the feature.

Teach Git to download and unbundle the data advertised by those bundles
during 'git clone'.

Also, since the --bundle-uri option exists, we do not want to mix the
advertised bundles with the user-specified bundles.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 builtin/clone.c  | 26 +++++++++++++++++----
 t/t5601-clone.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 5 deletions(-)

Comments

Victoria Dye Nov. 29, 2022, 1:59 a.m. UTC | #1
Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <derrickstolee@github.com>
> 
> A previous change introduced the transport methods to acquire a bundle
> list from the 'bundle-uri' protocol v2 command, when advertised _and_
> when the client has chosen to enable the feature.
> 
> Teach Git to download and unbundle the data advertised by those bundles
> during 'git clone'.
> 
> Also, since the --bundle-uri option exists, we do not want to mix the
> advertised bundles with the user-specified bundles.
> 
> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
> ---
>  builtin/clone.c  | 26 +++++++++++++++++----
>  t/t5601-clone.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 80 insertions(+), 5 deletions(-)
> 
> diff --git a/builtin/clone.c b/builtin/clone.c
> index 22b1e506452..09f10477ed6 100644
> --- a/builtin/clone.c
> +++ b/builtin/clone.c
> @@ -1267,11 +1267,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
>  	if (refs)
>  		mapped_refs = wanted_peer_refs(refs, &remote->fetch);
>  
> -	/*
> -	 * Populate transport->got_remote_bundle_uri and
> -	 * transport->bundle_uri. We might get nothing.
> -	 */
> -	transport_get_remote_bundle_uri(transport, 1);
> +	if (!bundle_uri) {
> +		/*
> +		* Populate transport->got_remote_bundle_uri and
> +		* transport->bundle_uri. We might get nothing.
> +		*/
> +		transport_get_remote_bundle_uri(transport, 1);
> +
> +		if (transport->bundles &&
> +		    hashmap_get_size(&transport->bundles->bundles)) {
> +			/* At this point, we need the_repository to match the cloned repo. */
> +			if (repo_init(the_repository, git_dir, work_tree))
> +				warning(_("failed to initialize the repo, skipping bundle URI"));
> +			if (fetch_bundle_list(the_repository,
> +					      remote->url[0],
> +					      transport->bundles))

If the repo initialization fails, this line is still executed. Should the
condition be 'else if' to avoid that?

Otherwise, all of the added logic looks good to me.

> +				warning(_("failed to fetch advertised bundles"));
> +		} else {
> +			clear_bundle_list(transport->bundles);
> +			FREE_AND_NULL(transport->bundles);
> +		}
> +	}
>  
>  	if (mapped_refs) {
>  		int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
> diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
> index 45f0803ed4d..d1d8139751e 100755
> --- a/t/t5601-clone.sh
> +++ b/t/t5601-clone.sh

Per the commit message:

> Also, since the --bundle-uri option exists, we do not want to mix the
> advertised bundles with the user-specified bundles.

Could you add a test verifying that '--bundle-uri' causes 'clone' to skip
bundle URI auto-discovery? It's clear from the implementation above that
'clone' is currently doing that as-expected, but it might be nice to have
the test for regression testing purposes.

> @@ -795,6 +795,65 @@ test_expect_success 'reject cloning shallow repository using HTTP' '
>  	git clone --no-reject-shallow $HTTPD_URL/smart/repo.git repo
>  '
>  
> +test_expect_success 'auto-discover bundle URI from HTTP clone' '
> +	test_when_finished rm -rf trace.txt repo2 "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" &&
> +	git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/everything.bundle" --all &&
> +	git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" &&
> +
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
> +		uploadpack.advertiseBundleURIs true &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
> +		bundle.version 1 &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
> +		bundle.mode all &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
> +		bundle.everything.uri "$HTTPD_URL/everything.bundle" &&
> +
> +	GIT_TEST_BUNDLE_URI=1 \
> +	GIT_TRACE2_EVENT="$(pwd)/trace.txt" \
> +		git -c protocol.version=2 clone \
> +		$HTTPD_URL/smart/repo2.git repo2 &&
> +	cat >pattern <<-EOF &&
> +	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/everything.bundle"\]
> +	EOF
> +	grep -f pattern trace.txt
> +'
> +
> +test_expect_success 'auto-discover multiple bundles from HTTP clone' '
> +	test_when_finished rm -rf trace.txt repo3 "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" &&
> +
> +	test_commit -C src new &&
> +	git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/new.bundle" HEAD~1..HEAD &&
> +	git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" &&
> +
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
> +		uploadpack.advertiseBundleURIs true &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
> +		bundle.version 1 &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
> +		bundle.mode all &&
> +
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
> +		bundle.everything.uri "$HTTPD_URL/everything.bundle" &&
> +	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
> +		bundle.new.uri "$HTTPD_URL/new.bundle" &&
> +
> +	GIT_TEST_BUNDLE_URI=1 \
> +	GIT_TRACE2_EVENT="$(pwd)/trace.txt" \
> +		git -c protocol.version=2 clone \
> +		$HTTPD_URL/smart/repo3.git repo3 &&
> +
> +	# We should fetch _both_ bundles
> +	cat >pattern <<-EOF &&
> +	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/everything.bundle"\]
> +	EOF
> +	grep -f pattern trace.txt &&
> +	cat >pattern <<-EOF &&
> +	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/new.bundle"\]
> +	EOF
> +	grep -f pattern trace.txt
> +'
> +
>  # DO NOT add non-httpd-specific tests here, because the last part of this
>  # test script is only executed when httpd is available and enabled.
>
Derrick Stolee Dec. 2, 2022, 4:16 p.m. UTC | #2
On 11/28/2022 8:59 PM, Victoria Dye wrote:
> Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <derrickstolee@github.com>
>>
>> A previous change introduced the transport methods to acquire a bundle
>> list from the 'bundle-uri' protocol v2 command, when advertised _and_
>> when the client has chosen to enable the feature.
>>
>> Teach Git to download and unbundle the data advertised by those bundles
>> during 'git clone'.
>>
>> Also, since the --bundle-uri option exists, we do not want to mix the
>> advertised bundles with the user-specified bundles.
>>
>> Signed-off-by: Derrick Stolee <derrickstolee@github.com>
>> ---
>>  builtin/clone.c  | 26 +++++++++++++++++----
>>  t/t5601-clone.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 80 insertions(+), 5 deletions(-)
>>
>> diff --git a/builtin/clone.c b/builtin/clone.c
>> index 22b1e506452..09f10477ed6 100644
>> --- a/builtin/clone.c
>> +++ b/builtin/clone.c
>> @@ -1267,11 +1267,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
>>  	if (refs)
>>  		mapped_refs = wanted_peer_refs(refs, &remote->fetch);
>>
>> -	/*
>> -	 * Populate transport->got_remote_bundle_uri and
>> -	 * transport->bundle_uri. We might get nothing.
>> -	 */
>> -	transport_get_remote_bundle_uri(transport, 1);
>> +	if (!bundle_uri) {
>> +		/*
>> +		* Populate transport->got_remote_bundle_uri and
>> +		* transport->bundle_uri. We might get nothing.
>> +		*/
>> +		transport_get_remote_bundle_uri(transport, 1);
>> +
>> +		if (transport->bundles &&
>> +		    hashmap_get_size(&transport->bundles->bundles)) {
>> +			/* At this point, we need the_repository to match the cloned repo. */
>> +			if (repo_init(the_repository, git_dir, work_tree))
>> +				warning(_("failed to initialize the repo, skipping bundle URI"));
>> +			if (fetch_bundle_list(the_repository,
>> +					      remote->url[0],
>> +					      transport->bundles))
>
> If the repo initialization fails, this line is still executed. Should the
> condition be 'else if' to avoid that?
>
> Otherwise, all of the added logic looks good to me.

Yes, it should. An earlier version of this follows the correct if/else if
pattern.

>> diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
>> index 45f0803ed4d..d1d8139751e 100755
>> --- a/t/t5601-clone.sh
>> +++ b/t/t5601-clone.sh
>
> Per the commit message:
>
>> Also, since the --bundle-uri option exists, we do not want to mix the
>> advertised bundles with the user-specified bundles.
>
> Could you add a test verifying that '--bundle-uri' causes 'clone' to skip
> bundle URI auto-discovery? It's clear from the implementation above that
> 'clone' is currently doing that as-expected, but it might be nice to have
> the test for regression testing purposes.

I can add that to the lib-bundle-uri-protocol.sh tests pretty easily.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/builtin/clone.c b/builtin/clone.c
index 22b1e506452..09f10477ed6 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1267,11 +1267,27 @@  int cmd_clone(int argc, const char **argv, const char *prefix)
 	if (refs)
 		mapped_refs = wanted_peer_refs(refs, &remote->fetch);
 
-	/*
-	 * Populate transport->got_remote_bundle_uri and
-	 * transport->bundle_uri. We might get nothing.
-	 */
-	transport_get_remote_bundle_uri(transport, 1);
+	if (!bundle_uri) {
+		/*
+		* Populate transport->got_remote_bundle_uri and
+		* transport->bundle_uri. We might get nothing.
+		*/
+		transport_get_remote_bundle_uri(transport, 1);
+
+		if (transport->bundles &&
+		    hashmap_get_size(&transport->bundles->bundles)) {
+			/* At this point, we need the_repository to match the cloned repo. */
+			if (repo_init(the_repository, git_dir, work_tree))
+				warning(_("failed to initialize the repo, skipping bundle URI"));
+			if (fetch_bundle_list(the_repository,
+					      remote->url[0],
+					      transport->bundles))
+				warning(_("failed to fetch advertised bundles"));
+		} else {
+			clear_bundle_list(transport->bundles);
+			FREE_AND_NULL(transport->bundles);
+		}
+	}
 
 	if (mapped_refs) {
 		int hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index 45f0803ed4d..d1d8139751e 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -795,6 +795,65 @@  test_expect_success 'reject cloning shallow repository using HTTP' '
 	git clone --no-reject-shallow $HTTPD_URL/smart/repo.git repo
 '
 
+test_expect_success 'auto-discover bundle URI from HTTP clone' '
+	test_when_finished rm -rf trace.txt repo2 "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" &&
+	git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/everything.bundle" --all &&
+	git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" &&
+
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
+		uploadpack.advertiseBundleURIs true &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
+		bundle.version 1 &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
+		bundle.mode all &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo2.git" config \
+		bundle.everything.uri "$HTTPD_URL/everything.bundle" &&
+
+	GIT_TEST_BUNDLE_URI=1 \
+	GIT_TRACE2_EVENT="$(pwd)/trace.txt" \
+		git -c protocol.version=2 clone \
+		$HTTPD_URL/smart/repo2.git repo2 &&
+	cat >pattern <<-EOF &&
+	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/everything.bundle"\]
+	EOF
+	grep -f pattern trace.txt
+'
+
+test_expect_success 'auto-discover multiple bundles from HTTP clone' '
+	test_when_finished rm -rf trace.txt repo3 "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" &&
+
+	test_commit -C src new &&
+	git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/new.bundle" HEAD~1..HEAD &&
+	git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" &&
+
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
+		uploadpack.advertiseBundleURIs true &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
+		bundle.version 1 &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
+		bundle.mode all &&
+
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
+		bundle.everything.uri "$HTTPD_URL/everything.bundle" &&
+	git -C "$HTTPD_DOCUMENT_ROOT_PATH/repo3.git" config \
+		bundle.new.uri "$HTTPD_URL/new.bundle" &&
+
+	GIT_TEST_BUNDLE_URI=1 \
+	GIT_TRACE2_EVENT="$(pwd)/trace.txt" \
+		git -c protocol.version=2 clone \
+		$HTTPD_URL/smart/repo3.git repo3 &&
+
+	# We should fetch _both_ bundles
+	cat >pattern <<-EOF &&
+	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/everything.bundle"\]
+	EOF
+	grep -f pattern trace.txt &&
+	cat >pattern <<-EOF &&
+	"event":"child_start".*"argv":\["git-remote-https","$HTTPD_URL/new.bundle"\]
+	EOF
+	grep -f pattern trace.txt
+'
+
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.