diff mbox series

[6/6] send-pack: support push negotiation

Message ID a2daa1022c41820b2109d9572069d12684470cb8.1617929278.git.jonathantanmy@google.com (mailing list archive)
State Superseded
Headers show
Series Push negotiation | expand

Commit Message

Jonathan Tan April 9, 2021, 1:10 a.m. UTC
Teach Git the push.negotiate config variable.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/config/push.txt |  7 ++++
 send-pack.c                   | 61 ++++++++++++++++++++++++++++++++---
 t/t5516-fetch-push.sh         | 35 ++++++++++++++++++++
 3 files changed, 99 insertions(+), 4 deletions(-)

Comments

Derrick Stolee May 3, 2021, 3:35 p.m. UTC | #1
On 4/8/21 9:10 PM, Jonathan Tan wrote:
> Teach Git the push.negotiate config variable.
...
> +push.negotiate::
> +	If set to "true", attempt to reduce the size of the packfile
> +	sent by rounds of negotiation in which the client and the
> +	server attempt to find commits in common. If "false", Git will
> +	rely solely on the server's ref advertisement to find commits
> +	in common.

Works for me.

> diff --git a/send-pack.c b/send-pack.c
> index 5f215b13c7..9cb9f71650 100644
> --- a/send-pack.c
> +++ b/send-pack.c
> @@ -56,7 +56,9 @@ static void feed_object(const struct object_id *oid, FILE *fh, int negative)
>  /*
>   * Make a pack stream and spit it out into file descriptor fd
>   */
> -static int pack_objects(int fd, struct ref *refs, struct oid_array *extra, struct send_pack_args *args)
> +static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised,
> +			struct oid_array *negotiated,
> +			struct send_pack_args *args)

At the moment, I don't see why we need two oid_arrays here.
Instead, this 'extra' could instead be renamed to something
like 'server_objects' or 'base_objects' to make it clear
that we don't want those objects, and can even use them and
their reachable objects as delta bases, when appropriate.

Or, just don't touch it.

> +static void get_commons_through_negotiation(const char *url,
> +					    const struct ref *remote_refs,
> +					    struct oid_array *commons)
> +{
> +	struct child_process child = CHILD_PROCESS_INIT;
> +	const struct ref *ref;
> +	int len = the_hash_algo->hexsz + 1; /* hash + NL */
> +
> +	child.git_cmd = 1;
> +	child.no_stdin = 1;
> +	child.out = -1;
> +	strvec_pushl(&child.args, "fetch", "--negotiate-only", NULL);
> +	for (ref = remote_refs; ref; ref = ref->next)
> +		strvec_pushf(&child.args, "--negotiation-tip=%s", oid_to_hex(&ref->new_oid));
> +	strvec_push(&child.args, url);

Oh! We are using a 'git fetch --negotiate-only' subprocess here. You
can ignore my previous message about updating the docs for this to be
used only by tests.

> +
> +	if (start_command(&child))
> +		die(_("send-pack: unable to fork off fetch subprocess"));
> +
> +	do {
> +		char hex_hash[GIT_MAX_HEXSZ + 1];
> +		int read_len = read_in_full(child.out, hex_hash, len);
> +		struct object_id oid;
> +		const char *end;
> +
> +		if (!read_len)
> +			break;
> +		if (read_len != len)
> +			die("invalid length read %d", read_len);
> +		if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n')
> +			die("invalid hash");
> +		oid_array_append(commons, &oid);
This appends, so there is no reason why it needs to be empty before
the method. Is there a way we could feed the extra_have set when
calling this method? Or is it happening at a strange time?

> +	} while (1);
> +
> +	if (finish_command(&child)) {
> +		/*
> +		 * The information that push negotiation provides is useful but
> +		 * not mandatory.
> +		 */
> +		warning(_("push negotiation failed; proceeding anyway with push"));
> +	}
> +}
> +
>  int send_pack(struct send_pack_args *args,
>  	      int fd[], struct child_process *conn,
>  	      struct ref *remote_refs,
>  	      struct oid_array *extra_have)
>  {
> +	struct oid_array commons = OID_ARRAY_INIT;
>  	int in = fd[0];
>  	int out = fd[1];
>  	struct strbuf req_buf = STRBUF_INIT;
> @@ -426,6 +474,7 @@ int send_pack(struct send_pack_args *args,
>  	int quiet_supported = 0;
>  	int agent_supported = 0;
>  	int advertise_sid = 0;
> +	int push_negotiate = 0;
>  	int use_atomic = 0;
>  	int atomic_supported = 0;
>  	int use_push_options = 0;
> @@ -437,6 +486,10 @@ int send_pack(struct send_pack_args *args,
>  	const char *push_cert_nonce = NULL;
>  	struct packet_reader reader;
>  
> +	git_config_get_bool("push.negotiate", &push_negotiate);
> +	if (push_negotiate)
> +		get_commons_through_negotiation(args->url, remote_refs, &commons);
> +
>  	git_config_get_bool("transfer.advertisesid", &advertise_sid);
>  
>  	/* Does the other end support the reporting? */
> @@ -625,7 +678,7 @@ int send_pack(struct send_pack_args *args,
>  			   PACKET_READ_DIE_ON_ERR_PACKET);
>  
>  	if (need_pack_data && cmds_sent) {
> -		if (pack_objects(out, remote_refs, extra_have, args) < 0) {
> +		if (pack_objects(out, remote_refs, extra_have, &commons, args) < 0) {

Here, it would be nice if extra_have and commons were merged before calling
pack_objects(). I mentioned a way to perhaps make that easier above, but
the context might not make that be super-simple. Running a loop here to
scan 'commons' and append them to 'extra_have' might be a sufficient
approach.

Generally, this approach seems like it would work. I have not done any
local testing, yet.

Thanks,
-Stolee
Jonathan Tan May 3, 2021, 10:02 p.m. UTC | #2
[snip asking about whether extra_have (a.k.a. advertised) and commons (a.k.a.
negotiated) can be merged]

> Here, it would be nice if extra_have and commons were merged before calling
> pack_objects(). I mentioned a way to perhaps make that easier above, but
> the context might not make that be super-simple. Running a loop here to
> scan 'commons' and append them to 'extra_have' might be a sufficient
> approach.
> 
> Generally, this approach seems like it would work. I have not done any
> local testing, yet.
> 
> Thanks,
> -Stolee

I was reluctant to merge them because that would involve either (1)
adding commons to "extra_have" (as you suggest) or (2) iterating through
"extra_have" in order to add it to the "commons" set. For (1), this
would modify "extra_have", which is passed in from the outside. Looking
at its callers, the main one in git_transport_push() in transport.c
calls send_pack() with a set that has traversed the transport API, so I
think it would be confusing if such a set suddenly changed. For (2), the
extra loop seems more troublesome than having two parameters with
clearer names indicating where they come from. I don't mind changing to
(2), though, if people want it.
Derrick Stolee May 4, 2021, 5:26 p.m. UTC | #3
On 5/3/2021 6:02 PM, Jonathan Tan wrote:
> [snip asking about whether extra_have (a.k.a. advertised) and commons (a.k.a.
> negotiated) can be merged]
> 
>> Here, it would be nice if extra_have and commons were merged before calling
>> pack_objects(). I mentioned a way to perhaps make that easier above, but
>> the context might not make that be super-simple. Running a loop here to
>> scan 'commons' and append them to 'extra_have' might be a sufficient
>> approach.
>>
>> Generally, this approach seems like it would work. I have not done any
>> local testing, yet.
>>
>> Thanks,
>> -Stolee
> 
> I was reluctant to merge them because that would involve either (1)
> adding commons to "extra_have" (as you suggest) or (2) iterating through
> "extra_have" in order to add it to the "commons" set. For (1), this
> would modify "extra_have", which is passed in from the outside. Looking
> at its callers, the main one in git_transport_push() in transport.c
> calls send_pack() with a set that has traversed the transport API, so I
> think it would be confusing if such a set suddenly changed. For (2), the
> extra loop seems more troublesome than having two parameters with
> clearer names indicating where they come from. I don't mind changing to
> (2), though, if people want it.
 
I suppose this concern about "ownership" is valid and worth having the
two parameters in the helper function for extra safety.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Documentation/config/push.txt b/Documentation/config/push.txt
index 21b256e0a4..f2667b2689 100644
--- a/Documentation/config/push.txt
+++ b/Documentation/config/push.txt
@@ -120,3 +120,10 @@  push.useForceIfIncludes::
 	`--force-if-includes` as an option to linkgit:git-push[1]
 	in the command line. Adding `--no-force-if-includes` at the
 	time of push overrides this configuration setting.
+
+push.negotiate::
+	If set to "true", attempt to reduce the size of the packfile
+	sent by rounds of negotiation in which the client and the
+	server attempt to find commits in common. If "false", Git will
+	rely solely on the server's ref advertisement to find commits
+	in common.
diff --git a/send-pack.c b/send-pack.c
index 5f215b13c7..9cb9f71650 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -56,7 +56,9 @@  static void feed_object(const struct object_id *oid, FILE *fh, int negative)
 /*
  * Make a pack stream and spit it out into file descriptor fd
  */
-static int pack_objects(int fd, struct ref *refs, struct oid_array *extra, struct send_pack_args *args)
+static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised,
+			struct oid_array *negotiated,
+			struct send_pack_args *args)
 {
 	/*
 	 * The child becomes pack-objects --revs; we feed
@@ -94,8 +96,10 @@  static int pack_objects(int fd, struct ref *refs, struct oid_array *extra, struc
 	 * parameters by writing to the pipe.
 	 */
 	po_in = xfdopen(po.in, "w");
-	for (i = 0; i < extra->nr; i++)
-		feed_object(&extra->oid[i], po_in, 1);
+	for (i = 0; i < advertised->nr; i++)
+		feed_object(&advertised->oid[i], po_in, 1);
+	for (i = 0; i < negotiated->nr; i++)
+		feed_object(&negotiated->oid[i], po_in, 1);
 
 	while (refs) {
 		if (!is_null_oid(&refs->old_oid))
@@ -409,11 +413,55 @@  static void reject_invalid_nonce(const char *nonce, int len)
 	}
 }
 
+static void get_commons_through_negotiation(const char *url,
+					    const struct ref *remote_refs,
+					    struct oid_array *commons)
+{
+	struct child_process child = CHILD_PROCESS_INIT;
+	const struct ref *ref;
+	int len = the_hash_algo->hexsz + 1; /* hash + NL */
+
+	child.git_cmd = 1;
+	child.no_stdin = 1;
+	child.out = -1;
+	strvec_pushl(&child.args, "fetch", "--negotiate-only", NULL);
+	for (ref = remote_refs; ref; ref = ref->next)
+		strvec_pushf(&child.args, "--negotiation-tip=%s", oid_to_hex(&ref->new_oid));
+	strvec_push(&child.args, url);
+
+	if (start_command(&child))
+		die(_("send-pack: unable to fork off fetch subprocess"));
+
+	do {
+		char hex_hash[GIT_MAX_HEXSZ + 1];
+		int read_len = read_in_full(child.out, hex_hash, len);
+		struct object_id oid;
+		const char *end;
+
+		if (!read_len)
+			break;
+		if (read_len != len)
+			die("invalid length read %d", read_len);
+		if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n')
+			die("invalid hash");
+		oid_array_append(commons, &oid);
+	} while (1);
+
+	if (finish_command(&child)) {
+		/*
+		 * The information that push negotiation provides is useful but
+		 * not mandatory.
+		 */
+		warning(_("push negotiation failed; proceeding anyway with push"));
+	}
+}
+
 int send_pack(struct send_pack_args *args,
 	      int fd[], struct child_process *conn,
 	      struct ref *remote_refs,
 	      struct oid_array *extra_have)
 {
+	struct oid_array commons = OID_ARRAY_INIT;
 	int in = fd[0];
 	int out = fd[1];
 	struct strbuf req_buf = STRBUF_INIT;
@@ -426,6 +474,7 @@  int send_pack(struct send_pack_args *args,
 	int quiet_supported = 0;
 	int agent_supported = 0;
 	int advertise_sid = 0;
+	int push_negotiate = 0;
 	int use_atomic = 0;
 	int atomic_supported = 0;
 	int use_push_options = 0;
@@ -437,6 +486,10 @@  int send_pack(struct send_pack_args *args,
 	const char *push_cert_nonce = NULL;
 	struct packet_reader reader;
 
+	git_config_get_bool("push.negotiate", &push_negotiate);
+	if (push_negotiate)
+		get_commons_through_negotiation(args->url, remote_refs, &commons);
+
 	git_config_get_bool("transfer.advertisesid", &advertise_sid);
 
 	/* Does the other end support the reporting? */
@@ -625,7 +678,7 @@  int send_pack(struct send_pack_args *args,
 			   PACKET_READ_DIE_ON_ERR_PACKET);
 
 	if (need_pack_data && cmds_sent) {
-		if (pack_objects(out, remote_refs, extra_have, args) < 0) {
+		if (pack_objects(out, remote_refs, extra_have, &commons, args) < 0) {
 			if (args->stateless_rpc)
 				close(out);
 			if (git_connection_is_socket(conn))
diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh
index f11742ed59..62fb9074a2 100755
--- a/t/t5516-fetch-push.sh
+++ b/t/t5516-fetch-push.sh
@@ -191,6 +191,41 @@  test_expect_success 'fetch with pushInsteadOf (should not rewrite)' '
 	)
 '
 
+grep_wrote () {
+	object_count=$1
+	file_name=$2
+	grep 'write_pack_file/wrote.*"value":"'$1'"' $2
+}
+
+test_expect_success 'push with negotiation' '
+	# Without negotiation
+	mk_empty testrepo &&
+	git push testrepo $the_first_commit:refs/remotes/origin/first_commit &&
+	git -C testrepo config receive.hideRefs refs/remotes/origin/first_commit &&
+	echo now pushing without negotiation &&
+	GIT_TRACE2_EVENT="$(pwd)/event" git push testrepo refs/heads/main:refs/remotes/origin/main &&
+	grep_wrote 5 event && # 2 commits, 2 trees, 1 blob
+
+	# Same commands, but with negotiation
+	rm event &&
+	mk_empty testrepo &&
+	git push testrepo $the_first_commit:refs/remotes/origin/first_commit &&
+	git -C testrepo config receive.hideRefs refs/remotes/origin/first_commit &&
+	GIT_TRACE2_EVENT="$(pwd)/event" git -c push.negotiate=1 push testrepo refs/heads/main:refs/remotes/origin/main &&
+	grep_wrote 2 event # 1 commit, 1 tree
+'
+
+test_expect_success 'push with negotiation proceeds anyway even if negotiation fails' '
+	rm event &&
+	mk_empty testrepo &&
+	git push testrepo $the_first_commit:refs/remotes/origin/first_commit &&
+	git -C testrepo config receive.hideRefs refs/remotes/origin/first_commit &&
+	GIT_TRACE_PACKET="$(pwd)/trace" GIT_TEST_PROTOCOL_VERSION=0 GIT_TRACE2_EVENT="$(pwd)/event" \
+		git -c push.negotiate=1 push testrepo refs/heads/main:refs/remotes/origin/main 2>err &&
+	grep_wrote 5 event && # 2 commits, 2 trees, 1 blob
+	test_i18ngrep "push negotiation failed" err
+'
+
 test_expect_success 'push without wildcard' '
 	mk_empty testrepo &&