diff mbox series

[1/2] fetch-pack: in protocol v2, in_vain only after ACK

Message ID 51174e53527f9b29bb0a5ebb8726f07333113dfb.1587775989.git.jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Protocol v2 in_vain fixes | expand

Commit Message

Jonathan Tan April 25, 2020, 12:56 a.m. UTC
When fetching, Git stops negotiation when it has sent at least
MAX_IN_VAIN (which is 256) "have" lines without having any of them
ACK-ed. But this is supposed to trigger only after the first ACK, as
pack-protocol.txt says:

  However, the 256 limit *only* turns on in the canonical client
  implementation if we have received at least one "ACK %s continue"
  during a prior round.  This helps to ensure that at least one common
  ancestor is found before we give up entirely.

The code path for protocol v0 observes this, but not protocol v2,
resulting in shorter negotiation rounds but significantly larger
packfiles. Teach the code path for protocol v2 to check this criterion
only after at least one ACK was received.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 fetch-pack.c          | 13 +++++++++----
 t/t5500-fetch-pack.sh | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+), 4 deletions(-)

Comments

Junio C Hamano April 25, 2020, 5:08 a.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> When fetching, Git stops negotiation when it has sent at least
> MAX_IN_VAIN (which is 256) "have" lines without having any of them
> ACK-ed. But this is supposed to trigger only after the first ACK, as
> pack-protocol.txt says:
>
>   However, the 256 limit *only* turns on in the canonical client
>   implementation if we have received at least one "ACK %s continue"
>   during a prior round.  This helps to ensure that at least one common
>   ancestor is found before we give up entirely.
>
> The code path for protocol v0 observes this, but not protocol v2,
> resulting in shorter negotiation rounds but significantly larger
> packfiles. Teach the code path for protocol v2 to check this criterion
> only after at least one ACK was received.

Makes sense.

I think we can instead use in_vain==NULL as a signal that we haven't
seen any ack yet and that shrinks the patch somewhat ([Patch 1/2]
becomes +6/-4 from +9/-4 for fetch-pack.c).  I do not know if the
result is easier to follow (as there is one less variable to keep in
mind), though.  The callee is probably easier to reason about (it
needs to worry about the *in_vain variable only when it is given---
you cannot beat the simplicity of it), but the caller side may
become harder to reason about, especially without any comment where
in_vain_p becomes non-NULL.  Your version has an assignment to make
"seen_ack" to true there, which makes it very clear without any
comment what is going on.

So I dunno.  I've queued your version and discarded the following.

 fetch-pack.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fetch-pack.c b/fetch-pack.c
index 1734a573b0..7b837b6a76 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1156,8 +1156,9 @@ static int add_haves(struct fetch_negotiator *negotiator,
 			break;
 	}
 
-	*in_vain += haves_added;
-	if (!haves_added || *in_vain >= MAX_IN_VAIN) {
+	if (in_vain)
+		*in_vain += haves_added;
+	if (!haves_added || (in_vain && *in_vain >= MAX_IN_VAIN)) {
 		/* Send Done */
 		packet_buf_write(req_buf, "done\n");
 		ret = 1;
@@ -1444,6 +1445,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	int haves_to_send = INITIAL_FLUSH;
 	struct fetch_negotiator negotiator_alloc;
 	struct fetch_negotiator *negotiator;
+	int *in_vain_p = NULL;
 
 	if (args->no_dependents) {
 		negotiator = NULL;
@@ -1499,7 +1501,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			}
 			if (send_fetch_request(negotiator, fd[1], args, ref,
 					       &common,
-					       &haves_to_send, &in_vain,
+					       &haves_to_send, in_vain_p,
 					       reader.use_sideband))
 				state = FETCH_GET_PACK;
 			else
@@ -1512,7 +1514,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 				state = FETCH_GET_PACK;
 				break;
 			case 1:
-				in_vain = 0;
+				in_vain_p = &in_vain;
 				/* fallthrough */
 			default:
 				state = FETCH_SEND_REQUEST;
Jonathan Nieder April 26, 2020, 12:28 a.m. UTC | #2
Hi,

Jonathan Tan wrote:

> When fetching, Git stops negotiation when it has sent at least
> MAX_IN_VAIN (which is 256) "have" lines without having any of them
> ACK-ed. But this is supposed to trigger only after the first ACK, as
> pack-protocol.txt says:
> 
>   However, the 256 limit *only* turns on in the canonical client
>   implementation if we have received at least one "ACK %s continue"
>   during a prior round.  This helps to ensure that at least one common
>   ancestor is found before we give up entirely.
> 
> The code path for protocol v0 observes this, but not protocol v2,
> resulting in shorter negotiation rounds but significantly larger
> packfiles. Teach the code path for protocol v2 to check this criterion
> only after at least one ACK was received.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  fetch-pack.c          | 13 +++++++++----
>  t/t5500-fetch-pack.sh | 19 +++++++++++++++++++
>  2 files changed, 28 insertions(+), 4 deletions(-)

Makes a lot of sense.  Good find.

[...]
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
[...]
> @@ -1513,6 +1517,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  				break;
>  			case 1:
>  				in_vain = 0;
> +				seen_ack = 1;

not about this patch: can these return values from process_acks be made
into an enum with named enumerators?  That would make what's happening
in the call site more obvious.

>  				/* fallthrough */
>  			default:
>  				state = FETCH_SEND_REQUEST;
> diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh
> index baa1a99f45..fcc5cc8ab0 100755
> --- a/t/t5500-fetch-pack.sh
> +++ b/t/t5500-fetch-pack.sh
> @@ -385,6 +385,25 @@ test_expect_success 'clone shallow with packed refs' '
>  	test_cmp count8.expected count8.actual
>  '
>  
> +test_expect_success 'in_vain not triggered before first ACK' '
> +	rm -rf myserver myclient trace &&
> +	git init myserver &&
> +	test_commit -C myserver foo &&
> +	git clone "file://$(pwd)/myserver" myclient &&
> +
> +	# MAX_IN_VAIN is 256. Because of batching, the client will send 496
> +	# (16+32+64+128+256) commits, not 256, before giving up. So create 496
> +	# irrelevant commits.
> +	test_commit_bulk -C myclient 496 &&
> +
> +	# The new commit that the client wants to fetch.
> +	test_commit -C myserver bar &&
> +
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C myclient fetch --progress origin &&
> +	cp trace /tmp/x &&

Leftover debugging line?

> +	test_i18ngrep "Total 3 " trace

Clever.

In some sense this is a fragile test, since the server could change
how it reports progress some day.  Would it make sense (perhaps as a
followup patch) for this to use a trace2 log instead?  For example,
if we turn on tracing in the server, then since 9ed8790282
(pack-objects: write objects packed to trace2, 2019-04-11) it will
report how many objects were in the pack it wrote.

After removing the "cp trace /tmp/x" line,
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>

Thanks.
Jonathan Tan April 27, 2020, 5:27 p.m. UTC | #3
> not about this patch: can these return values from process_acks be made
> into an enum with named enumerators?  That would make what's happening
> in the call site more obvious.

That sounds reasonable to me.

> > +	cp trace /tmp/x &&
> 
> Leftover debugging line?

Ah, yes. If Junio can't or won't do it locally then I'll send out
another set with this changed.

> > +	test_i18ngrep "Total 3 " trace
> 
> Clever.
> 
> In some sense this is a fragile test, since the server could change
> how it reports progress some day.  Would it make sense (perhaps as a
> followup patch) for this to use a trace2 log instead?  For example,
> if we turn on tracing in the server, then since 9ed8790282
> (pack-objects: write objects packed to trace2, 2019-04-11) it will
> report how many objects were in the pack it wrote.

Probably better to have tracing in the client and use that, but this
requires us to add that tracing. But in general I agree.

> After removing the "cp trace /tmp/x" line,
> Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
> 
> Thanks.

Thanks for your review.
Junio C Hamano April 27, 2020, 10:16 p.m. UTC | #4
Jonathan Tan <jonathantanmy@google.com> writes:

>> not about this patch: can these return values from process_acks be made
>> into an enum with named enumerators?  That would make what's happening
>> in the call site more obvious.
>
> That sounds reasonable to me.
>
>> > +	cp trace /tmp/x &&
>> 
>> Leftover debugging line?
>
> Ah, yes. If Junio can't or won't do it locally then I'll send out
> another set with this changed.

Well, if I am expecting the "named enumerators" patch anyway, I'd
wait the fix to be done at the source ;-)
diff mbox series

Patch

diff --git a/fetch-pack.c b/fetch-pack.c
index 0b07b3ee73..7d15c7af81 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1143,6 +1143,7 @@  static void add_common(struct strbuf *req_buf, struct oidset *common)
 }
 
 static int add_haves(struct fetch_negotiator *negotiator,
+		     int seen_ack,
 		     struct strbuf *req_buf,
 		     int *haves_to_send, int *in_vain)
 {
@@ -1157,7 +1158,7 @@  static int add_haves(struct fetch_negotiator *negotiator,
 	}
 
 	*in_vain += haves_added;
-	if (!haves_added || *in_vain >= MAX_IN_VAIN) {
+	if (!haves_added || (seen_ack && *in_vain >= MAX_IN_VAIN)) {
 		/* Send Done */
 		packet_buf_write(req_buf, "done\n");
 		ret = 1;
@@ -1173,7 +1174,7 @@  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 			      struct fetch_pack_args *args,
 			      const struct ref *wants, struct oidset *common,
 			      int *haves_to_send, int *in_vain,
-			      int sideband_all)
+			      int sideband_all, int seen_ack)
 {
 	int ret = 0;
 	struct strbuf req_buf = STRBUF_INIT;
@@ -1230,7 +1231,8 @@  static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
 		add_common(&req_buf, common);
 
 		/* Add initial haves */
-		ret = add_haves(negotiator, &req_buf, haves_to_send, in_vain);
+		ret = add_haves(negotiator, seen_ack, &req_buf,
+				haves_to_send, in_vain);
 	}
 
 	/* Send request */
@@ -1444,6 +1446,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	int haves_to_send = INITIAL_FLUSH;
 	struct fetch_negotiator negotiator_alloc;
 	struct fetch_negotiator *negotiator;
+	int seen_ack = 0;
 
 	if (args->no_dependents) {
 		negotiator = NULL;
@@ -1500,7 +1503,8 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			if (send_fetch_request(negotiator, fd[1], args, ref,
 					       &common,
 					       &haves_to_send, &in_vain,
-					       reader.use_sideband))
+					       reader.use_sideband,
+					       seen_ack))
 				state = FETCH_GET_PACK;
 			else
 				state = FETCH_PROCESS_ACKS;
@@ -1513,6 +1517,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 				break;
 			case 1:
 				in_vain = 0;
+				seen_ack = 1;
 				/* fallthrough */
 			default:
 				state = FETCH_SEND_REQUEST;
diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh
index baa1a99f45..fcc5cc8ab0 100755
--- a/t/t5500-fetch-pack.sh
+++ b/t/t5500-fetch-pack.sh
@@ -385,6 +385,25 @@  test_expect_success 'clone shallow with packed refs' '
 	test_cmp count8.expected count8.actual
 '
 
+test_expect_success 'in_vain not triggered before first ACK' '
+	rm -rf myserver myclient trace &&
+	git init myserver &&
+	test_commit -C myserver foo &&
+	git clone "file://$(pwd)/myserver" myclient &&
+
+	# MAX_IN_VAIN is 256. Because of batching, the client will send 496
+	# (16+32+64+128+256) commits, not 256, before giving up. So create 496
+	# irrelevant commits.
+	test_commit_bulk -C myclient 496 &&
+
+	# The new commit that the client wants to fetch.
+	test_commit -C myserver bar &&
+
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C myclient fetch --progress origin &&
+	cp trace /tmp/x &&
+	test_i18ngrep "Total 3 " trace
+'
+
 test_expect_success 'fetch in shallow repo unreachable shallow objects' '
 	(
 		git clone --bare --branch B --single-branch "file://$(pwd)/." no-reflog &&