diff mbox series

[v4,4/7] fetch: add --refetch option

Message ID 78501bbf28105ef252976266abb437a278585609.1648476132.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit 3c7bab06e12922fbcb375187eb60ac426fc72a3a
Headers show
Series fetch: add repair: full refetch without negotiation (was: "refiltering") | expand

Commit Message

Robert Coup March 28, 2022, 2:02 p.m. UTC
From: Robert Coup <robert@coup.net.nz>

Teach fetch and transports the --refetch option to force a full fetch
without negotiating common commits with the remote. Use when applying a
new partial clone filter to refetch all matching objects.

Signed-off-by: Robert Coup <robert@coup.net.nz>
---
 Documentation/fetch-options.txt |  9 +++++++++
 builtin/fetch.c                 | 15 ++++++++++++++-
 transport-helper.c              |  3 +++
 transport.c                     |  4 ++++
 transport.h                     |  4 ++++
 5 files changed, 34 insertions(+), 1 deletion(-)

Comments

Ævar Arnfjörð Bjarmason March 31, 2022, 3:18 p.m. UTC | #1
On Mon, Mar 28 2022, Robert Coup via GitGitGadget wrote:

> From: Robert Coup <robert@coup.net.nz>
>
> Teach fetch and transports the --refetch option to force a full fetch
> without negotiating common commits with the remote. Use when applying a
> new partial clone filter to refetch all matching objects.
>
> [...]
> +ifndef::git-pull[]
> +--refetch::
> +	Instead of negotiating with the server to avoid transferring commits and
> +	associated objects that are already present locally, this option fetches
> +	all objects as a fresh clone would. Use this to reapply a partial clone
> +	filter from configuration or using `--filter=` when the filter
> +	definition has changed.
> +endif::git-pull[]

Re my comment on negotiation specifics in 2/7, this documentation is
really over-promising depending on what the answer to that is:
https://lore.kernel.org/git/220331.86o81mp2w1.gmgdl@evledraar.gmail.com/

I.e. instead of saying that we WILL fetch all objects "just like a
clone" shouldn't we have less focus on implementation details here, and
assure the user that we'll make their object store "complete" as though
--filter hadn't been used, without going into the specifics of what'll
happen over the wire?
>  		return -1;
>  
> +	/*
> +	 * Similarly, if we need to refetch, we always want to perform a full
> +	 * fetch ignoring existing objects.
> +	 */
> +	if (refetch)
> +		return -1;
> +
> +

One too many \n here.
Robert Coup April 1, 2022, 10:31 a.m. UTC | #2
Hi Ævar,

On Thu, 31 Mar 2022 at 16:20, Ævar Arnfjörð Bjarmason <avarab@gmail.com> wrote:
> > +--refetch::
> > +     Instead of negotiating with the server to avoid transferring commits and
> > +     associated objects that are already present locally, this option fetches
> > +     all objects as a fresh clone would. Use this to reapply a partial clone
> > +     filter from configuration or using `--filter=` when the filter
> > +     definition has changed.
>
> Re my comment on negotiation specifics in 2/7, this documentation is
> really over-promising depending on what the answer to that is:
> https://lore.kernel.org/git/220331.86o81mp2w1.gmgdl@evledraar.gmail.com/
>
> I.e. instead of saying that we WILL fetch all objects "just like a
> clone" shouldn't we have less focus on implementation details here, and
> assure the user that we'll make their object store "complete" as though
> --filter hadn't been used, without going into the specifics of what'll
> happen over the wire?

That's not quite the case though: we'll make their object db complete
as if a fresh clone with any specified --filter (including none) from
that remote has been done.

IMO explaining what will happen and to expect (particularly as this is
_not_ transferring some sort of object-delta-set, clone transfers can
be big) is a good thing. If it improves later; then change the docs.

Alternatively, rewording along the lines of "it will fix up your
object db to match a changed filter" is implying that only the missing
bits will be fetched.

Thanks, Rob.
diff mbox series

Patch

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 6cdd9d43c5a..d03fce5aae0 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -163,6 +163,15 @@  endif::git-pull[]
 	behavior for a remote may be specified with the remote.<name>.tagOpt
 	setting. See linkgit:git-config[1].
 
+ifndef::git-pull[]
+--refetch::
+	Instead of negotiating with the server to avoid transferring commits and
+	associated objects that are already present locally, this option fetches
+	all objects as a fresh clone would. Use this to reapply a partial clone
+	filter from configuration or using `--filter=` when the filter
+	definition has changed.
+endif::git-pull[]
+
 --refmap=<refspec>::
 	When fetching refs listed on the command line, use the
 	specified refspec (can be given more than once) to map the
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 9b4018f62c4..e391a5dbc55 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -59,7 +59,7 @@  static int prune_tags = -1; /* unspecified */
 
 static int all, append, dry_run, force, keep, multiple, update_head_ok;
 static int write_fetch_head = 1;
-static int verbosity, deepen_relative, set_upstream;
+static int verbosity, deepen_relative, set_upstream, refetch;
 static int progress = -1;
 static int enable_auto_gc = 1;
 static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
@@ -190,6 +190,9 @@  static struct option builtin_fetch_options[] = {
 	OPT_SET_INT_F(0, "unshallow", &unshallow,
 		      N_("convert to a complete repository"),
 		      1, PARSE_OPT_NONEG),
+	OPT_SET_INT_F(0, "refetch", &refetch,
+		      N_("re-fetch without negotiating common commits"),
+		      1, PARSE_OPT_NONEG),
 	{ OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"),
 		   N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN },
 	OPT_CALLBACK_F(0, "recurse-submodules-default",
@@ -1304,6 +1307,14 @@  static int check_exist_and_connected(struct ref *ref_map)
 	if (deepen)
 		return -1;
 
+	/*
+	 * Similarly, if we need to refetch, we always want to perform a full
+	 * fetch ignoring existing objects.
+	 */
+	if (refetch)
+		return -1;
+
+
 	/*
 	 * check_connected() allows objects to merely be promised, but
 	 * we need all direct targets to exist.
@@ -1517,6 +1528,8 @@  static struct transport *prepare_transport(struct remote *remote, int deepen)
 		set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
 	if (update_shallow)
 		set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
+	if (refetch)
+		set_option(transport, TRANS_OPT_REFETCH, "yes");
 	if (filter_options.choice) {
 		const char *spec =
 			expand_list_objects_filter_spec(&filter_options);
diff --git a/transport-helper.c b/transport-helper.c
index a0297b0986c..b4dbbabb0c2 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -715,6 +715,9 @@  static int fetch_refs(struct transport *transport,
 	if (data->transport_options.update_shallow)
 		set_helper_option(transport, "update-shallow", "true");
 
+	if (data->transport_options.refetch)
+		set_helper_option(transport, "refetch", "true");
+
 	if (data->transport_options.filter_options.choice) {
 		const char *spec = expand_list_objects_filter_spec(
 			&data->transport_options.filter_options);
diff --git a/transport.c b/transport.c
index 70e9840a90e..3d64a43ab39 100644
--- a/transport.c
+++ b/transport.c
@@ -250,6 +250,9 @@  static int set_git_option(struct git_transport_options *opts,
 		list_objects_filter_die_if_populated(&opts->filter_options);
 		parse_list_objects_filter(&opts->filter_options, value);
 		return 0;
+	} else if (!strcmp(name, TRANS_OPT_REFETCH)) {
+		opts->refetch = !!value;
+		return 0;
 	} else if (!strcmp(name, TRANS_OPT_REJECT_SHALLOW)) {
 		opts->reject_shallow = !!value;
 		return 0;
@@ -384,6 +387,7 @@  static int fetch_refs_via_pack(struct transport *transport,
 	args.update_shallow = data->options.update_shallow;
 	args.from_promisor = data->options.from_promisor;
 	args.filter_options = data->options.filter_options;
+	args.refetch = data->options.refetch;
 	args.stateless_rpc = transport->stateless_rpc;
 	args.server_options = transport->server_options;
 	args.negotiation_tips = data->options.negotiation_tips;
diff --git a/transport.h b/transport.h
index a0bc6a1e9eb..12bc08fc339 100644
--- a/transport.h
+++ b/transport.h
@@ -16,6 +16,7 @@  struct git_transport_options {
 	unsigned update_shallow : 1;
 	unsigned reject_shallow : 1;
 	unsigned deepen_relative : 1;
+	unsigned refetch : 1;
 
 	/* see documentation of corresponding flag in fetch-pack.h */
 	unsigned from_promisor : 1;
@@ -216,6 +217,9 @@  void transport_check_allowed(const char *type);
 /* Filter objects for partial clone and fetch */
 #define TRANS_OPT_LIST_OBJECTS_FILTER "filter"
 
+/* Refetch all objects without negotiating */
+#define TRANS_OPT_REFETCH "refetch"
+
 /* Request atomic (all-or-nothing) updates when pushing */
 #define TRANS_OPT_ATOMIC "atomic"