diff mbox series

[v2,2/8] fetch-pack: add repairing

Message ID 2d817a65db55750d27986c2e2ab5b1723d17081e.1645719218.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series fetch: add repair: full refetch without negotiation (was: "refiltering") | expand

Commit Message

Robert Coup Feb. 24, 2022, 4:13 p.m. UTC
From: Robert Coup <robert@coup.net.nz>

Allow a 'repair fetch' where the contents of the local object store are
ignored and a full fetch is performed, not attempting to find or
negotiate common commits with the remote.

A key use case is to apply a new partial clone blob/tree filter and
refetch all the associated matching content, which would otherwise not
be transferred when the commit objects are already present locally.

Signed-off-by: Robert Coup <robert@coup.net.nz>
---
 fetch-pack.c | 50 +++++++++++++++++++++++++++++++-------------------
 fetch-pack.h |  1 +
 2 files changed, 32 insertions(+), 19 deletions(-)

Comments

Junio C Hamano Feb. 25, 2022, 6:46 a.m. UTC | #1
"Robert Coup via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -694,6 +696,9 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
>  
>  	save_commit_buffer = 0;
>  
> +	if (args->repair)
> +		return;
> +

Reading how the original value of save_commit_buffer is saved away,
the variable gets cleared and then gets restored before the function
returns in the normal codepath, this new code looks wrong.  Hitting
this early return after clearing the variable means nobody will
restore the saved value of the variable, no?

> @@ -1027,9 +1032,6 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
>  	struct fetch_negotiator negotiator_alloc;
>  	struct fetch_negotiator *negotiator;
>  
> -	negotiator = &negotiator_alloc;
> -	fetch_negotiator_init(r, negotiator);

I know why you want to force the "noop" negotiator while repairing,
but it is unclear why you need to move this down in the function.

>  	sort_ref_list(&ref, ref_compare_name);
>  	QSORT(sought, nr_sought, cmp_ref_by_name);
>  
> @@ -1119,9 +1121,16 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
>  	if (!server_supports_hash(the_hash_algo->name, NULL))
>  		die(_("Server does not support this repository's object format"));
>  
> +	negotiator = &negotiator_alloc;
> +	if (args->repair) {
> +		fetch_negotiator_init_noop(negotiator);
> +	} else {
> +		fetch_negotiator_init(r, negotiator);
> +	}

Hmph.  I am debating myself if hardcoding the implementation detail
of "when repairing, the noop negitiator is the only useful one" like
this code does is a sensible thing to do.  If we later need to tweak
the choice of negotiator used depending on the caller's needs,
perhaps fetch_negotiator_init() should gain a new flags word, i.e.

	fetch_negotiator_init(struct repository *,
			      struct fetch_negotiator *,
			      unsigned flags)

where "Use negotiator suitable for the repairing fetch" could be a
single bit in the flags word, making this caller more like:

	negotiator = &negotiator_alloc;
	flags = 0;
	if (args->repair)
		flags |= FETCH_NEGOTIATOR_REPAIRING;
	fetch_negotiator_init(r, negotiator, flags);

perhaps.  That way, [1/8] becomes unnecessary.

>  	mark_complete_and_common_ref(negotiator, args, &ref);
>  	filter_refs(args, &ref, sought, nr_sought);
> -	if (everything_local(args, &ref)) {
> +	if (!args->repair && everything_local(args, &ref)) {
>  		packet_flush(fd[1]);
>  		goto all_done;
>  	}
> @@ -1587,7 +1596,10 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  	struct strvec index_pack_args = STRVEC_INIT;
>  
>  	negotiator = &negotiator_alloc;
> -	fetch_negotiator_init(r, negotiator);
> +	if (args->repair)
> +		fetch_negotiator_init_noop(negotiator);
> +	else
> +		fetch_negotiator_init(r, negotiator);

Likewise.

>  	packet_reader_init(&reader, fd[0], NULL, 0,
>  			   PACKET_READ_CHOMP_NEWLINE |
> @@ -1613,7 +1625,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			/* Filter 'ref' by 'sought' and those that aren't local */
>  			mark_complete_and_common_ref(negotiator, args, &ref);
>  			filter_refs(args, &ref, sought, nr_sought);
> -			if (everything_local(args, &ref))
> +			if (!args->repair && everything_local(args, &ref))
>  				state = FETCH_DONE;
>  			else
>  				state = FETCH_SEND_REQUEST;
> diff --git a/fetch-pack.h b/fetch-pack.h
> index 7f94a2a5831..bbb663edda8 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -42,6 +42,7 @@ struct fetch_pack_args {
>  	unsigned update_shallow:1;
>  	unsigned reject_shallow_remote:1;
>  	unsigned deepen:1;
> +	unsigned repair:1;
>  
>  	/*
>  	 * Indicate that the remote of this request is a promisor remote. The
Robert Coup Feb. 28, 2022, 12:14 p.m. UTC | #2
Hi Junio,

On Fri, 25 Feb 2022 at 06:46, Junio C Hamano <gitster@pobox.com> wrote:
>
> "Robert Coup via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > @@ -694,6 +696,9 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
> >
> >       save_commit_buffer = 0;
> >
> > +     if (args->repair)
> > +             return;
> > +
>
> Reading how the original value of save_commit_buffer is saved away,
> the variable gets cleared and then gets restored before the function
> returns in the normal codepath, this new code looks wrong.  Hitting
> this early return after clearing the variable means nobody will
> restore the saved value of the variable, no?

Good spotting, thank you.

>
> > @@ -1027,9 +1032,6 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
> >       struct fetch_negotiator negotiator_alloc;
> >       struct fetch_negotiator *negotiator;
> >
> > -     negotiator = &negotiator_alloc;
> > -     fetch_negotiator_init(r, negotiator);
>
> I know why you want to force the "noop" negotiator while repairing,
> but it is unclear why you need to move this down in the function.

Seemed cleaner to initialise the right negotiator once, rather than
clearing and re-initialising depending on repair mode.

> Hmph.  I am debating myself if hardcoding the implementation detail
> of "when repairing, the noop negitiator is the only useful one" like
> this code does is a sensible thing to do.  If we later need to tweak
> the choice of negotiator used depending on the caller's needs,
> perhaps fetch_negotiator_init() should gain a new flags word, i.e.

To me this feels a bit hypothetical, but maybe I'm missing a use case?
The point of repairing is not to negotiate common commits and do
(effectively) a clone-style fresh fetch. If some future special
negotiator that has a repair mode arrives, or likewise a more complex
repair mode then other things will probably need adapting?

> where "Use negotiator suitable for the repairing fetch" could be a
> single bit in the flags word, making this caller more like:
>
>         negotiator = &negotiator_alloc;
>         flags = 0;
>         if (args->repair)
>                 flags |= FETCH_NEGOTIATOR_REPAIRING;
>         fetch_negotiator_init(r, negotiator, flags);
>
> perhaps.  That way, [1/8] becomes unnecessary.

With the current patch it is clear what's happening, that the user's
negotiator selection is deliberately being ignored for the purposes of
repairing. Conversely, calling negotiator_init() asking for a skipping
negotiator in repair mode and getting back a noop negotiator seems
unobvious.

Thanks,

Rob :)
diff mbox series

Patch

diff --git a/fetch-pack.c b/fetch-pack.c
index 87657907e78..8103243947a 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -312,19 +312,21 @@  static int find_common(struct fetch_negotiator *negotiator,
 		const char *remote_hex;
 		struct object *o;
 
-		/*
-		 * If that object is complete (i.e. it is an ancestor of a
-		 * local ref), we tell them we have it but do not have to
-		 * tell them about its ancestors, which they already know
-		 * about.
-		 *
-		 * We use lookup_object here because we are only
-		 * interested in the case we *know* the object is
-		 * reachable and we have already scanned it.
-		 */
-		if (((o = lookup_object(the_repository, remote)) != NULL) &&
-				(o->flags & COMPLETE)) {
-			continue;
+		if (!args->repair) {
+			/*
+			* If that object is complete (i.e. it is an ancestor of a
+			* local ref), we tell them we have it but do not have to
+			* tell them about its ancestors, which they already know
+			* about.
+			*
+			* We use lookup_object here because we are only
+			* interested in the case we *know* the object is
+			* reachable and we have already scanned it.
+			*/
+			if (((o = lookup_object(the_repository, remote)) != NULL) &&
+					(o->flags & COMPLETE)) {
+				continue;
+			}
 		}
 
 		remote_hex = oid_to_hex(remote);
@@ -694,6 +696,9 @@  static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
 
 	save_commit_buffer = 0;
 
+	if (args->repair)
+		return;
+
 	trace2_region_enter("fetch-pack", "parse_remote_refs_and_find_cutoff", NULL);
 	for (ref = *refs; ref; ref = ref->next) {
 		struct commit *commit;
@@ -1027,9 +1032,6 @@  static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 	struct fetch_negotiator negotiator_alloc;
 	struct fetch_negotiator *negotiator;
 
-	negotiator = &negotiator_alloc;
-	fetch_negotiator_init(r, negotiator);
-
 	sort_ref_list(&ref, ref_compare_name);
 	QSORT(sought, nr_sought, cmp_ref_by_name);
 
@@ -1119,9 +1121,16 @@  static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 	if (!server_supports_hash(the_hash_algo->name, NULL))
 		die(_("Server does not support this repository's object format"));
 
+	negotiator = &negotiator_alloc;
+	if (args->repair) {
+		fetch_negotiator_init_noop(negotiator);
+	} else {
+		fetch_negotiator_init(r, negotiator);
+	}
+
 	mark_complete_and_common_ref(negotiator, args, &ref);
 	filter_refs(args, &ref, sought, nr_sought);
-	if (everything_local(args, &ref)) {
+	if (!args->repair && everything_local(args, &ref)) {
 		packet_flush(fd[1]);
 		goto all_done;
 	}
@@ -1587,7 +1596,10 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 	struct strvec index_pack_args = STRVEC_INIT;
 
 	negotiator = &negotiator_alloc;
-	fetch_negotiator_init(r, negotiator);
+	if (args->repair)
+		fetch_negotiator_init_noop(negotiator);
+	else
+		fetch_negotiator_init(r, negotiator);
 
 	packet_reader_init(&reader, fd[0], NULL, 0,
 			   PACKET_READ_CHOMP_NEWLINE |
@@ -1613,7 +1625,7 @@  static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			/* Filter 'ref' by 'sought' and those that aren't local */
 			mark_complete_and_common_ref(negotiator, args, &ref);
 			filter_refs(args, &ref, sought, nr_sought);
-			if (everything_local(args, &ref))
+			if (!args->repair && everything_local(args, &ref))
 				state = FETCH_DONE;
 			else
 				state = FETCH_SEND_REQUEST;
diff --git a/fetch-pack.h b/fetch-pack.h
index 7f94a2a5831..bbb663edda8 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -42,6 +42,7 @@  struct fetch_pack_args {
 	unsigned update_shallow:1;
 	unsigned reject_shallow_remote:1;
 	unsigned deepen:1;
+	unsigned repair:1;
 
 	/*
 	 * Indicate that the remote of this request is a promisor remote. The