diff mbox series

[2/2] fetch-pack: respect --no-update-shallow in v2

Message ID c4d2f409e246cce02ebfdb8c7110e3700d066ec8.1553546216.git.jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Last big GIT_TEST_PROTOCOL_VERSION=2 fix, hopefully | expand

Commit Message

Jonathan Tan March 25, 2019, 8:43 p.m. UTC
In protocol v0, when sending "shallow" lines, the server distinguishes
between lines caused by the remote repo being shallow and lines caused
by client-specified depth settings. Unless "--update-shallow" is
specified, there is a difference in behavior: refs that reach the former
"shallow" lines, but not the latter, are rejected. But in v2, the server
does not, and the client treats all "shallow" lines like lines caused by
client-specified depth settings.

Full restoration of v0 functionality is not possible without protocol
change, but we can implement a heuristic: if we specify any depth
setting, treat all "shallow" lines like lines caused by client-specified
depth settings (that is, unaffected by "--no-update-shallow"), but
otherwise, treat them like lines caused by the remote repo being shallow
(that is, affected by "--no-update-shallow"). This restores most of v0
behavior, except in the case where a client fetches from a shallow
repository with depth settings.

This patch causes a test that previously failed with
GIT_TEST_PROTOCOL_VERSION=2 to pass.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 fetch-pack.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

Comments

Jeff King March 26, 2019, 5:20 a.m. UTC | #1
On Mon, Mar 25, 2019 at 01:43:23PM -0700, Jonathan Tan wrote:

> In protocol v0, when sending "shallow" lines, the server distinguishes
> between lines caused by the remote repo being shallow and lines caused
> by client-specified depth settings. Unless "--update-shallow" is
> specified, there is a difference in behavior: refs that reach the former
> "shallow" lines, but not the latter, are rejected. But in v2, the server
> does not, and the client treats all "shallow" lines like lines caused by
> client-specified depth settings.
> 
> Full restoration of v0 functionality is not possible without protocol
> change,

That's rather unfortunate. Is this because the v2 ls-refs phase is
separate, and that's when a v0 server would tell us about its shallows?
It looks like in v2 it comes in a separate "shallow-info" section.

What would the protocol change look like?  Would we just need a
capability to instruct the server to mark the two different types of
shallow distinctly? Or do we actually need to convey the information
separately (e.g., in the ls-refs phase)?

None of that matters for your patch here, but I'm just wondering what
the path forward is.

> but we can implement a heuristic: if we specify any depth
> setting, treat all "shallow" lines like lines caused by client-specified
> depth settings (that is, unaffected by "--no-update-shallow"), but
> otherwise, treat them like lines caused by the remote repo being shallow
> (that is, affected by "--no-update-shallow"). This restores most of v0
> behavior, except in the case where a client fetches from a shallow
> repository with depth settings.

That seems like the best we can do without the protocol change. And even
if we adjust the protocol, we need some fallback behavior for existing
v2 servers, so this is worth doing.

>  fetch-pack.c | 44 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 36 insertions(+), 8 deletions(-)

The patch looks reasonable to me, though I am far from an expert on the
shallow bits of the protocol. One thing I did notice:

>  static void receive_shallow_info(struct fetch_pack_args *args,
> -				 struct packet_reader *reader)
> +				 struct packet_reader *reader,
> +				 struct shallow_info *si)
>  {
> -	int line_received = 0;
> +	struct oid_array *shallows;
> +	int unshallow_received = 0;
> +
> +	shallows = xcalloc(1, sizeof(*shallows));

This has to be heap-allocated, since we pass off ownership to "si"
(sometimes). But in the v0 case, it comes from the transport's
&data->shallows of a local variable in cmd_fetch_pack(), and we never
free it. So I think this oid_array ends up getting leaked.

Perhaps it's worth passing down the shallows array we get from the
caller of fetch_pack(). Something like the patch below (I think it is
never NULL, which means in your patch 1 you can simplify the conditional
for the BUG).

diff --git a/fetch-pack.c b/fetch-pack.c
index 672c79c91a..af2ee47a83 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1254,13 +1254,11 @@ static int process_acks(struct fetch_negotiator *negotiator,
 
 static void receive_shallow_info(struct fetch_pack_args *args,
 				 struct packet_reader *reader,
+				 struct oid_array *shallows,
 				 struct shallow_info *si)
 {
-	struct oid_array *shallows;
 	int unshallow_received = 0;
 
-	shallows = xcalloc(1, sizeof(*shallows));
-
 	process_section_header(reader, "shallow-info", 0);
 	while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
 		const char *arg;
@@ -1303,7 +1301,6 @@ static void receive_shallow_info(struct fetch_pack_args *args,
 		for (i = 0; i < shallows->nr; i++)
 			register_shallow(the_repository, &shallows->oid[i]);
 		oid_array_clear(shallows);
-		free(shallows);
 		setup_alternate_shallow(&shallow_lock, &alternate_shallow_file,
 					NULL);
 		args->deepen = 1;
@@ -1320,7 +1317,6 @@ static void receive_shallow_info(struct fetch_pack_args *args,
 		else
 			alternate_shallow_file = NULL;
 	} else {
-		free(shallows);
 		alternate_shallow_file = NULL;
 	}
 }
@@ -1365,6 +1361,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 				    const struct ref *orig_ref,
 				    struct ref **sought, int nr_sought,
 				    struct shallow_info *si,
+				    struct oid_array *shallows,
 				    char **pack_lockfile)
 {
 	struct ref *ref = copy_ref_list(orig_ref);
@@ -1439,7 +1436,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		case FETCH_GET_PACK:
 			/* Check for shallow-info section */
 			if (process_section_header(&reader, "shallow-info", 1))
-				receive_shallow_info(args, &reader, si);
+				receive_shallow_info(args, &reader, shallows, si);
 
 			if (process_section_header(&reader, "wanted-refs", 1))
 				receive_wanted_refs(&reader, sought, nr_sought);
@@ -1681,7 +1678,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 			BUG("Protocol V2 does not provide shallows at this point in the fetch");
 		memset(&si, 0, sizeof(si));
 		ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought,
-					   &si, pack_lockfile);
+					   &si, shallow, pack_lockfile);
 	} else {
 		prepare_shallow_info(&si, shallow);
 		ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,
Duy Nguyen March 26, 2019, 10:14 a.m. UTC | #2
On Tue, Mar 26, 2019 at 12:20 PM Jeff King <peff@peff.net> wrote:
>
> On Mon, Mar 25, 2019 at 01:43:23PM -0700, Jonathan Tan wrote:
>
> > In protocol v0, when sending "shallow" lines, the server distinguishes
> > between lines caused by the remote repo being shallow and lines caused
> > by client-specified depth settings. Unless "--update-shallow" is
> > specified, there is a difference in behavior: refs that reach the former
> > "shallow" lines, but not the latter, are rejected. But in v2, the server
> > does not, and the client treats all "shallow" lines like lines caused by
> > client-specified depth settings.
> >
> > Full restoration of v0 functionality is not possible without protocol
> > change,
>
> That's rather unfortunate. Is this because the v2 ls-refs phase is
> separate, and that's when a v0 server would tell us about its shallows?
> It looks like in v2 it comes in a separate "shallow-info" section.
>
> What would the protocol change look like?  Would we just need a
> capability to instruct the server to mark the two different types of
> shallow distinctly? Or do we actually need to convey the information
> separately (e.g., in the ls-refs phase)?
>
> None of that matters for your patch here, but I'm just wondering what
> the path forward is.

If it helps (because I'm still catching up with v2 to actually help
review), this case is for cloning from a shallow repo. The commit that
outlines how .git/shallow is updated is 58babfffde (shallow.c: the 8
steps to select new commits for .git/shallow, 2013-12-05).

Since the first shallow info is about the shape of the remote repo
(where refs are the tips), ls-refs sounds like the right place to
include the information. In other words, ls-refs currently tells the
tip/top of the repo, what's missing is the piece about "the bottom"
(shallow cut points).

> > but we can implement a heuristic: if we specify any depth
> > setting, treat all "shallow" lines like lines caused by client-specified
> > depth settings (that is, unaffected by "--no-update-shallow"), but
> > otherwise, treat them like lines caused by the remote repo being shallow
> > (that is, affected by "--no-update-shallow"). This restores most of v0
> > behavior, except in the case where a client fetches from a shallow
> > repository with depth settings.
>
> That seems like the best we can do without the protocol change. And even
> if we adjust the protocol, we need some fallback behavior for existing
> v2 servers, so this is worth doing.

Are people actually doing this (i.e. cloning from or  pushing to a
shallow repo)? I added this with the intention that a big shallow repo
(e.g. one year long history) is served as the common source to reduce
server loads and everything, while the full/big repo is available but
rarely needed. I never saw anyone complain about it (so, likely not
using it).

The description of --update-shallow probably should mention this
fallback behavior? --update-shallow was not default because I feared
the local repo could be cut short by unsolicited shallow requests from
the server side, and it looks like --update-shallow is default (by
mistake) in v2? Maybe I worried for nothing. I dunno.
Jeff King March 26, 2019, 3:18 p.m. UTC | #3
On Tue, Mar 26, 2019 at 05:14:11PM +0700, Duy Nguyen wrote:

> > That seems like the best we can do without the protocol change. And even
> > if we adjust the protocol, we need some fallback behavior for existing
> > v2 servers, so this is worth doing.
> 
> Are people actually doing this (i.e. cloning from or  pushing to a
> shallow repo)? I added this with the intention that a big shallow repo
> (e.g. one year long history) is served as the common source to reduce
> server loads and everything, while the full/big repo is available but
> rarely needed. I never saw anyone complain about it (so, likely not
> using it).

I don't think I've ever seen anybody serve fetches out of a shallow
clone in practice (I don't think we ever seriously considered them at
GitHub, but given their general incompatibility with reachability
bitmaps, I suspect it would cause more performance problems than it
solves).

I've always imagined people do it for one-offs. E.g., they have a
shallow clone, and fetch out of that to a temporary copy. That may be
less useful these days with the advent of separate worktrees.

-Peff
Duy Nguyen March 26, 2019, 3:39 p.m. UTC | #4
On Tue, Mar 26, 2019 at 10:18 PM Jeff King <peff@peff.net> wrote:
>
> On Tue, Mar 26, 2019 at 05:14:11PM +0700, Duy Nguyen wrote:
>
> > > That seems like the best we can do without the protocol change. And even
> > > if we adjust the protocol, we need some fallback behavior for existing
> > > v2 servers, so this is worth doing.
> >
> > Are people actually doing this (i.e. cloning from or  pushing to a
> > shallow repo)? I added this with the intention that a big shallow repo
> > (e.g. one year long history) is served as the common source to reduce
> > server loads and everything, while the full/big repo is available but
> > rarely needed. I never saw anyone complain about it (so, likely not
> > using it).
>
> I don't think I've ever seen anybody serve fetches out of a shallow
> clone in practice (I don't think we ever seriously considered them at
> GitHub, but given their general incompatibility with reachability
> bitmaps, I suspect it would cause more performance problems than it
> solves).

Yeah. I still think it scales better long term because you can control
this history depth in the common case instead of trying to make git
work well with 30+ years worth of commits. I might try to make
reachability bitmaps and commit-graph work with shallow clones one
day, but that's pretty much in the "hey this is fun, are you bored?"
category.

> I've always imagined people do it for one-offs. E.g., they have a
> shallow clone, and fetch out of that to a temporary copy. That may be
> less useful these days with the advent of separate worktrees.
Jonathan Tan March 26, 2019, 5:37 p.m. UTC | #5
> On Mon, Mar 25, 2019 at 01:43:23PM -0700, Jonathan Tan wrote:
> 
> > In protocol v0, when sending "shallow" lines, the server distinguishes
> > between lines caused by the remote repo being shallow and lines caused
> > by client-specified depth settings. Unless "--update-shallow" is
> > specified, there is a difference in behavior: refs that reach the former
> > "shallow" lines, but not the latter, are rejected. But in v2, the server
> > does not, and the client treats all "shallow" lines like lines caused by
> > client-specified depth settings.
> > 
> > Full restoration of v0 functionality is not possible without protocol
> > change,
> 
> That's rather unfortunate. Is this because the v2 ls-refs phase is
> separate, and that's when a v0 server would tell us about its shallows?
> It looks like in v2 it comes in a separate "shallow-info" section.

That's right. In v2, it comes in "shallow-info", which happens right
before the server sends the packfile.

> What would the protocol change look like?  Would we just need a
> capability to instruct the server to mark the two different types of
> shallow distinctly? Or do we actually need to convey the information
> separately (e.g., in the ls-refs phase)?
> 
> None of that matters for your patch here, but I'm just wondering what
> the path forward is.

Conveying it in the ls-refs would work.

> > but we can implement a heuristic: if we specify any depth
> > setting, treat all "shallow" lines like lines caused by client-specified
> > depth settings (that is, unaffected by "--no-update-shallow"), but
> > otherwise, treat them like lines caused by the remote repo being shallow
> > (that is, affected by "--no-update-shallow"). This restores most of v0
> > behavior, except in the case where a client fetches from a shallow
> > repository with depth settings.
> 
> That seems like the best we can do without the protocol change. And even
> if we adjust the protocol, we need some fallback behavior for existing
> v2 servers, so this is worth doing.

Thanks.

> The patch looks reasonable to me, though I am far from an expert on the
> shallow bits of the protocol. One thing I did notice:
> 
> >  static void receive_shallow_info(struct fetch_pack_args *args,
> > -				 struct packet_reader *reader)
> > +				 struct packet_reader *reader,
> > +				 struct shallow_info *si)
> >  {
> > -	int line_received = 0;
> > +	struct oid_array *shallows;
> > +	int unshallow_received = 0;
> > +
> > +	shallows = xcalloc(1, sizeof(*shallows));
> 
> This has to be heap-allocated, since we pass off ownership to "si"
> (sometimes). But in the v0 case, it comes from the transport's
> &data->shallows of a local variable in cmd_fetch_pack(), and we never
> free it. So I think this oid_array ends up getting leaked.

Thanks for the catch.

> Perhaps it's worth passing down the shallows array we get from the
> caller of fetch_pack(). Something like the patch below (I think it is
> never NULL, which means in your patch 1 you can simplify the conditional
> for the BUG).

[snip patch]

You're right that it is never NULL - I have removed that check. As for
passing down the shallows array that we get from the caller of
fetch_pack(), that would get confusing because we end up modifying the
shallows array in some code paths, and the transport is sometimes reused
(for example, when backfilling tags). I have instead made a
shallows_scratch variable in fetch_pack(), and made it pass it down
(like in the diff you provided).
Jeff King March 26, 2019, 6:18 p.m. UTC | #6
On Tue, Mar 26, 2019 at 10:37:06AM -0700, Jonathan Tan wrote:

> > Perhaps it's worth passing down the shallows array we get from the
> > caller of fetch_pack(). Something like the patch below (I think it is
> > never NULL, which means in your patch 1 you can simplify the conditional
> > for the BUG).
> 
> [snip patch]
> 
> You're right that it is never NULL - I have removed that check. As for
> passing down the shallows array that we get from the caller of
> fetch_pack(), that would get confusing because we end up modifying the
> shallows array in some code paths, and the transport is sometimes reused
> (for example, when backfilling tags). I have instead made a
> shallows_scratch variable in fetch_pack(), and made it pass it down
> (like in the diff you provided).

Yeah, I confess to having spent quite a few minutes trying to figure out
the difference between "shallows" and "shallow_info", whether one
wrote into the other, and who was responsible for filling each in. So I
will not complain if you have a way of writing it that is less
confusing. :)

-Peff
diff mbox series

Patch

diff --git a/fetch-pack.c b/fetch-pack.c
index a0eb268dfc..672c79c91a 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1253,9 +1253,13 @@  static int process_acks(struct fetch_negotiator *negotiator,
 }
 
 static void receive_shallow_info(struct fetch_pack_args *args,
-				 struct packet_reader *reader)
+				 struct packet_reader *reader,
+				 struct shallow_info *si)
 {
-	int line_received = 0;
+	struct oid_array *shallows;
+	int unshallow_received = 0;
+
+	shallows = xcalloc(1, sizeof(*shallows));
 
 	process_section_header(reader, "shallow-info", 0);
 	while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
@@ -1265,8 +1269,7 @@  static void receive_shallow_info(struct fetch_pack_args *args,
 		if (skip_prefix(reader->line, "shallow ", &arg)) {
 			if (get_oid_hex(arg, &oid))
 				die(_("invalid shallow line: %s"), reader->line);
-			register_shallow(the_repository, &oid);
-			line_received = 1;
+			oid_array_append(shallows, &oid);
 			continue;
 		}
 		if (skip_prefix(reader->line, "unshallow ", &arg)) {
@@ -1279,7 +1282,7 @@  static void receive_shallow_info(struct fetch_pack_args *args,
 				die(_("error in object: %s"), reader->line);
 			if (unregister_shallow(&oid))
 				die(_("no shallow found: %s"), reader->line);
-			line_received = 1;
+			unshallow_received = 1;
 			continue;
 		}
 		die(_("expected shallow/unshallow, got %s"), reader->line);
@@ -1289,11 +1292,35 @@  static void receive_shallow_info(struct fetch_pack_args *args,
 	    reader->status != PACKET_READ_DELIM)
 		die(_("error processing shallow info: %d"), reader->status);
 
-	if (line_received) {
+	if (args->deepen || unshallow_received) {
+		/*
+		 * Treat these as shallow lines caused by our depth settings.
+		 * In v0, these lines cannot cause refs to be rejected; do the
+		 * same.
+		 */
+		int i;
+
+		for (i = 0; i < shallows->nr; i++)
+			register_shallow(the_repository, &shallows->oid[i]);
+		oid_array_clear(shallows);
+		free(shallows);
 		setup_alternate_shallow(&shallow_lock, &alternate_shallow_file,
 					NULL);
 		args->deepen = 1;
+	} else if (shallows->nr) {
+		/*
+		 * Treat these as shallow lines caused by the remote being
+		 * shallow. In v0, remote refs that reach these objects are
+		 * rejected (unless --update-shallow is set); do the same.
+		 */
+		prepare_shallow_info(si, shallows);
+		if (si->nr_ours || si->nr_theirs)
+			alternate_shallow_file =
+				setup_temporary_shallow(si->shallow);
+		else
+			alternate_shallow_file = NULL;
 	} else {
+		free(shallows);
 		alternate_shallow_file = NULL;
 	}
 }
@@ -1337,6 +1364,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 				    int fd[2],
 				    const struct ref *orig_ref,
 				    struct ref **sought, int nr_sought,
+				    struct shallow_info *si,
 				    char **pack_lockfile)
 {
 	struct ref *ref = copy_ref_list(orig_ref);
@@ -1411,7 +1439,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 		case FETCH_GET_PACK:
 			/* Check for shallow-info section */
 			if (process_section_header(&reader, "shallow-info", 1))
-				receive_shallow_info(args, &reader);
+				receive_shallow_info(args, &reader, si);
 
 			if (process_section_header(&reader, "wanted-refs", 1))
 				receive_wanted_refs(&reader, sought, nr_sought);
@@ -1653,7 +1681,7 @@  struct ref *fetch_pack(struct fetch_pack_args *args,
 			BUG("Protocol V2 does not provide shallows at this point in the fetch");
 		memset(&si, 0, sizeof(si));
 		ref_cpy = do_fetch_pack_v2(args, fd, ref, sought, nr_sought,
-					   pack_lockfile);
+					   &si, pack_lockfile);
 	} else {
 		prepare_shallow_info(&si, shallow);
 		ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,