Message ID | pull.1782.git.1725875232922.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | remote: introduce config to set prefetch refs | expand |
On Mon, Sep 9, 2024 at 3:17 PM Shubham Kanodia via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Shubham Kanodia <shubham.kanodia10@gmail.com> > > This commit introduces a new configuration option, > remote.<name>.prefetchref, which allows users to specify specific > ref patterns to be prefetched during a git fetch --prefetch > operation. > > The new option accepts a space-separated list of ref patterns. > When the --prefetch option is used with git fetch, only the refs > matching these patterns will be prefetched, instead of the > default behavior of prefetching all fetchable refs. > > Example usage in .git/config: > [remote "origin"] > prefetchref = "refs/heads/main refs/heads/feature/*" > > This change allows users to optimize their prefetch operations, potentially > reducing network traffic and improving performance for large repositories > with many refs. > > Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com> > --- > remote: introduce config to set prefetch refs > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v1 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v1 > Pull-Request: https://github.com/gitgitgadget/git/pull/1782 > > Documentation/config/remote.txt | 6 +++ > builtin/fetch.c | 29 +++++++++++++- > remote.c | 8 ++++ > remote.h | 3 ++ > t/t7900-maintenance.sh | 70 +++++++++++++++++++++++++++++++++ > 5 files changed, 115 insertions(+), 1 deletion(-) > > diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt > index 8efc53e836d..b25d76dd3b1 100644 > --- a/Documentation/config/remote.txt > +++ b/Documentation/config/remote.txt > @@ -33,6 +33,12 @@ remote.<name>.fetch:: > The default set of "refspec" for linkgit:git-fetch[1]. See > linkgit:git-fetch[1]. > > +remote.<name>.prefetchref:: > + Specify the refs to be prefetched when fetching from this remote. > + The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). > + These patterns are used as the source part of the refspecs for prefetching. > + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. > + > remote.<name>.push:: > The default set of "refspec" for linkgit:git-push[1]. See > linkgit:git-push[1]. > diff --git a/builtin/fetch.c b/builtin/fetch.c > index b2b5aee5bf2..6e584fa2ebb 100644 > --- a/builtin/fetch.c > +++ b/builtin/fetch.c > @@ -434,6 +434,30 @@ static void find_non_local_tags(const struct ref *refs, > oidset_clear(&fetch_oids); > } > > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > +{ > + if (remote->prefetch_refs.nr > 0) { > + int i; > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > + const char *src = remote->prefetch_refs.items[i].string; > + struct strbuf dst = STRBUF_INIT; > + > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > + if (starts_with(src, "refs/heads/")) { > + strbuf_addstr(&dst, src + 11); > + } else if (starts_with(src, "refs/")) { > + strbuf_addstr(&dst, src + 5); > + } else { > + strbuf_addstr(&dst, src); > + } > + > + refspec_appendf(rs, "%s:%s", src, dst.buf); > + strbuf_release(&dst); > + } > + } > +} > + > + > static void filter_prefetch_refspec(struct refspec *rs) > { > int i; > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > int existing_refs_populated = 0; > > filter_prefetch_refspec(rs); > - if (remote) > + if (remote) { > filter_prefetch_refspec(&remote->fetch); > + if (prefetch) > + apply_prefetch_refspec(remote, rs); > + } > > if (rs->nr) { > struct refspec *fetch_refspec; > diff --git a/remote.c b/remote.c > index 8f3dee13186..b46d62b2c47 100644 > --- a/remote.c > +++ b/remote.c > @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, > ret->prune = -1; /* unspecified */ > ret->prune_tags = -1; /* unspecified */ > ret->name = xstrndup(name, len); > + string_list_init_dup(&ret->prefetch_refs); > refspec_init(&ret->push, REFSPEC_PUSH); > refspec_init(&ret->fetch, REFSPEC_FETCH); > > @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) > free((char *)remote->uploadpack); > FREE_AND_NULL(remote->http_proxy); > FREE_AND_NULL(remote->http_proxy_authmethod); > + string_list_clear(&remote->prefetch_refs, 0); > } > > static void add_merge(struct branch *branch, const char *name) > @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, > remote->prune = git_config_bool(key, value); > else if (!strcmp(subkey, "prunetags")) > remote->prune_tags = git_config_bool(key, value); > + else if (!strcmp(subkey, "prefetchref")) { > + if (!value) > + return config_error_nonbool(key); > + string_list_split(&remote->prefetch_refs, value, ' ', -1); > + return 0; > + } > else if (!strcmp(subkey, "url")) { > if (!value) > return config_error_nonbool(key); > diff --git a/remote.h b/remote.h > index b901b56746d..c18e68e0d8d 100644 > --- a/remote.h > +++ b/remote.h > @@ -5,6 +5,7 @@ > #include "hashmap.h" > #include "refspec.h" > #include "strvec.h" > +#include "string-list.h" > > struct option; > struct transport_ls_refs_options; > @@ -77,6 +78,8 @@ struct remote { > > struct refspec fetch; > > + struct string_list prefetch_refs; > + > /* > * The setting for whether to fetch tags (as a separate rule from the > * configured refspecs); > diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh > index abae7a97546..2ad5b922d83 100755 > --- a/t/t7900-maintenance.sh > +++ b/t/t7900-maintenance.sh > @@ -245,6 +245,76 @@ test_expect_success 'prefetch multiple remotes' ' > test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt > ' > > +test_expect_success 'prefetch only acts on remote.<name>.prefetchref refs if present' ' > + test_create_repo prefetch-test-mixed-patterns && > + ( > + cd prefetch-test-mixed-patterns && > + test_commit initial && > + git clone . clone1 && > + git clone . clone2 && > + > + git remote add remote1 "file://$(pwd)/clone1" && > + git remote add remote2 "file://$(pwd)/clone2" && > + > + # Set single prefetchref pattern for remote1 and multiple for remote2 > + git config remote.remote1.prefetchref "refs/heads/foo" && > + git config remote.remote2.prefetchref "refs/heads/feature/* refs/heads/topic" && > + > + # Create branches in clone1 and push > + ( > + cd clone1 && > + git checkout -b foo && > + test_commit foo-commit && > + git checkout -b feature/a && > + test_commit feature-a-commit && > + git checkout -b other && > + test_commit other-commit && > + git push origin foo feature/a other > + ) && > + > + # Create branches in clone2 and push > + ( > + cd clone2 && > + git checkout -b topic && > + test_commit master-commit && > + git checkout -b feature/x && > + test_commit feature-x-commit && > + git checkout -b feature/y && > + test_commit feature-y-commit && > + git checkout -b dev && > + test_commit dev-commit && > + git push origin topic feature/x feature/y dev > + ) && > + > + # Run maintenance prefetch task > + GIT_TRACE2_EVENT="$(pwd)/prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && > + > + # Check that only specified refs were prefetched > + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && > + test_subcommand git fetch remote1 $fetchargs <prefetch.txt && > + test_subcommand git fetch remote2 $fetchargs <prefetch.txt && > + ls -R .git/refs/prefetch && > + > + # Verify that only specified refs are in the prefetch refs for remote1 > + git rev-parse refs/prefetch/remotes/remote1/foo && > + test_must_fail git rev-parse refs/prefetch/remotes/remote1/feature/a && > + test_must_fail git rev-parse refs/prefetch/remotes/remote1/other && > + > + # Verify that only specified refs are in the prefetch refs for remote2 > + git rev-parse refs/prefetch/remotes/remote2/feature/x && > + git rev-parse refs/prefetch/remotes/remote2/feature/y && > + git rev-parse refs/prefetch/remotes/remote2/topic && > + test_must_fail git rev-parse refs/prefetch/remotes/remote2/dev && > + > + # Fetch all refs and compare > + git fetch --all && > + test_cmp_rev refs/remotes/remote1/foo refs/prefetch/remotes/remote1/foo && > + test_cmp_rev refs/remotes/remote2/feature/x refs/prefetch/remotes/remote2/feature/x && > + test_cmp_rev refs/remotes/remote2/feature/y refs/prefetch/remotes/remote2/feature/y && > + test_cmp_rev refs/remotes/remote2/topic refs/prefetch/remotes/remote2/topic > + ) > +' > + > test_expect_success 'loose-objects task' ' > # Repack everything so we know the state of the object dir > git repack -adk && > > base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c > -- > gitgitgadget This is a continuation of my work, as we deemed that adding a `remote.<remote-name>.prefetch` was unnecessary given that there were already ways to stop fetching refs from a given remote using `skipFetchAll`. Looking at getting early feedback on the direction & implementation here since this isn't as straightforward as my last contribution. I would appreciate any thoughts! The config's name is tentatively `prefetchref,` but I'm open to suggestions.
"Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > +{ > + if (remote->prefetch_refs.nr > 0) { > + int i; > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > + const char *src = remote->prefetch_refs.items[i].string; > + struct strbuf dst = STRBUF_INIT; > + > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > + if (starts_with(src, "refs/heads/")) { > + strbuf_addstr(&dst, src + 11); > + } else if (starts_with(src, "refs/")) { > + strbuf_addstr(&dst, src + 5); > + } else { > + strbuf_addstr(&dst, src); > + } > + > + refspec_appendf(rs, "%s:%s", src, dst.buf); > + strbuf_release(&dst); > + } > + } > +} > static void filter_prefetch_refspec(struct refspec *rs) > { > int i; > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > int existing_refs_populated = 0; > > filter_prefetch_refspec(rs); > - if (remote) > + if (remote) { > filter_prefetch_refspec(&remote->fetch); > + if (prefetch) > + apply_prefetch_refspec(remote, rs); > + } Hmph, a separate helper function with a separate loop was something I did not expect to see. Looking at the filter_prefetch_refspec(), it already limits what it prefetched to what we usually fetch from the remote, and filteres out the tag namespace. I was hoping that this will _extend_ that existing mechanism, as if by default we have prefetch refspec "!refs/tags/*", which can be replaced by the configuration to say "!refs/tags/* !refs/changes/*", or positive ones like "refs/heads/*". The filtering semantics should be * a refspec whose src matches negated pattern (like !refs/tags/*) is rejected. * if the prefetch_refs has *only* positive patterns, then a refspec whose src does not match *any* of the pattern is rejected. * a refspec that is not rejected is prefetched. But the above still allows what filter_prefetch_refspec() does by default, without any way to narrow it down, and then adds its own entries. This is a half-tangent, but while studying for this topic, I noticed that filter_prefetch_refspec() triggers O(n) loop every time a fetch refspec is skipped. It all comes from 2e03115d (fetch: add --prefetch option, 2021-04-16) but rewriting the loop to use two pointers into the array seemed trivial and the result seemed a bit more readable. Your "further limit the prefetched refs with configuration" feature would probably replace this part of the updated code: + /* skip ones that do not store, or store in refs/tags */ + if (!rs->items[scan].dst || + (rs->items[scan].src && + starts_with(rs->items[scan].src, + ref_namespace[NAMESPACE_TAGS].ref))) { That is, instead of "skip ones that do not store, or store in refs/tags", filtering using the configured value (probably implemented as a helper function) would be used as the condition of the if statement. Thoughts? builtin/fetch.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git c/builtin/fetch.c w/builtin/fetch.c index 693f02b958..219302ed67 100644 --- c/builtin/fetch.c +++ w/builtin/fetch.c @@ -436,37 +436,38 @@ static void find_non_local_tags(const struct ref *refs, static void filter_prefetch_refspec(struct refspec *rs) { - int i; + int scan, store; if (!prefetch) return; - for (i = 0; i < rs->nr; i++) { + /* + * scan refspec items with 'scan', and decide to either + * mangle and store it in 'store', or omit it from the result. + */ + for (scan = store = 0; scan < rs->nr; scan++, store++) { struct strbuf new_dst = STRBUF_INIT; char *old_dst; const char *sub = NULL; - if (rs->items[i].negative) - continue; - if (!rs->items[i].dst || - (rs->items[i].src && - starts_with(rs->items[i].src, - ref_namespace[NAMESPACE_TAGS].ref))) { - int j; - - free(rs->items[i].src); - free(rs->items[i].dst); - - for (j = i + 1; j < rs->nr; j++) { - rs->items[j - 1] = rs->items[j]; - rs->raw[j - 1] = rs->raw[j]; - } - rs->nr--; - i--; + /* negative ones are kept as-is */ + if (rs->items[scan].negative) { + if (store != scan) + rs->items[store] = rs->items[scan]; continue; } - old_dst = rs->items[i].dst; + /* skip ones that do not store, or store in refs/tags */ + if (!rs->items[scan].dst || + (rs->items[scan].src && + starts_with(rs->items[scan].src, + ref_namespace[NAMESPACE_TAGS].ref))) { + refspec_item_clear(&rs->items[scan]); + store--; /* compensate for loop's auto increment */ + continue; + } + + old_dst = rs->items[scan].dst; strbuf_addstr(&new_dst, ref_namespace[NAMESPACE_PREFETCH].ref); /* @@ -478,11 +479,12 @@ static void filter_prefetch_refspec(struct refspec *rs) sub = old_dst; strbuf_addstr(&new_dst, sub); - rs->items[i].dst = strbuf_detach(&new_dst, NULL); - rs->items[i].force = 1; + rs->items[store].dst = strbuf_detach(&new_dst, NULL); + rs->items[store].force = 1; free(old_dst); } + rs->nr = store; } static struct ref *get_ref_map(struct remote *remote,
On Mon, Sep 9, 2024 at 10:12 PM Junio C Hamano <gitster@pobox.com> wrote: > > "Shubham Kanodia via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) > > +{ > > + if (remote->prefetch_refs.nr > 0) { > > + int i; > > + for (i = 0; i < remote->prefetch_refs.nr; i++) { > > + const char *src = remote->prefetch_refs.items[i].string; > > + struct strbuf dst = STRBUF_INIT; > > + > > + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); > > + if (starts_with(src, "refs/heads/")) { > > + strbuf_addstr(&dst, src + 11); > > + } else if (starts_with(src, "refs/")) { > > + strbuf_addstr(&dst, src + 5); > > + } else { > > + strbuf_addstr(&dst, src); > > + } > > + > > + refspec_appendf(rs, "%s:%s", src, dst.buf); > > + strbuf_release(&dst); > > + } > > + } > > +} > > static void filter_prefetch_refspec(struct refspec *rs) > > { > > int i; > > @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, > > int existing_refs_populated = 0; > > > > filter_prefetch_refspec(rs); > > - if (remote) > > + if (remote) { > > filter_prefetch_refspec(&remote->fetch); > > + if (prefetch) > > + apply_prefetch_refspec(remote, rs); > > + } > > Hmph, a separate helper function with a separate loop was something > I did not expect to see. Looking at the filter_prefetch_refspec(), > it already limits what it prefetched to what we usually fetch from > the remote, and filteres out the tag namespace. I was hoping that > this will _extend_ that existing mechanism, as if by default we > have prefetch refspec "!refs/tags/*", which can be replaced by the > configuration to say "!refs/tags/* !refs/changes/*", or positive > ones like "refs/heads/*". The filtering semantics should be > > * a refspec whose src matches negated pattern (like !refs/tags/*) > is rejected. > > * if the prefetch_refs has *only* positive patterns, then a refspec > whose src does not match *any* of the pattern is rejected. > > * a refspec that is not rejected is prefetched. > > But the above still allows what filter_prefetch_refspec() does by > default, without any way to narrow it down, and then adds its own > entries. > > This is a half-tangent, but while studying for this topic, I noticed > that filter_prefetch_refspec() triggers O(n) loop every time a fetch > refspec is skipped. > > It all comes from 2e03115d (fetch: add --prefetch option, > 2021-04-16) but rewriting the loop to use two pointers into the > array seemed trivial and the result seemed a bit more readable. > > Your "further limit the prefetched refs with configuration" feature > would probably replace this part of the updated code: > > + /* skip ones that do not store, or store in refs/tags */ > + if (!rs->items[scan].dst || > + (rs->items[scan].src && > + starts_with(rs->items[scan].src, > + ref_namespace[NAMESPACE_TAGS].ref))) { > > That is, instead of "skip ones that do not store, or store in refs/tags", > filtering using the configured value (probably implemented as a helper > function) would be used as the condition of the if statement. > > Thoughts? > > builtin/fetch.c | 46 ++++++++++++++++++++++++---------------------- > 1 file changed, 24 insertions(+), 22 deletions(-) > > diff --git c/builtin/fetch.c w/builtin/fetch.c > index 693f02b958..219302ed67 100644 > --- c/builtin/fetch.c > +++ w/builtin/fetch.c > @@ -436,37 +436,38 @@ static void find_non_local_tags(const struct ref *refs, > > static void filter_prefetch_refspec(struct refspec *rs) > { > - int i; > + int scan, store; > > if (!prefetch) > return; > > - for (i = 0; i < rs->nr; i++) { > + /* > + * scan refspec items with 'scan', and decide to either > + * mangle and store it in 'store', or omit it from the result. > + */ > + for (scan = store = 0; scan < rs->nr; scan++, store++) { > struct strbuf new_dst = STRBUF_INIT; > char *old_dst; > const char *sub = NULL; > > - if (rs->items[i].negative) > - continue; > - if (!rs->items[i].dst || > - (rs->items[i].src && > - starts_with(rs->items[i].src, > - ref_namespace[NAMESPACE_TAGS].ref))) { > - int j; > - > - free(rs->items[i].src); > - free(rs->items[i].dst); > - > - for (j = i + 1; j < rs->nr; j++) { > - rs->items[j - 1] = rs->items[j]; > - rs->raw[j - 1] = rs->raw[j]; > - } > - rs->nr--; > - i--; > + /* negative ones are kept as-is */ > + if (rs->items[scan].negative) { > + if (store != scan) > + rs->items[store] = rs->items[scan]; > continue; > } > > - old_dst = rs->items[i].dst; > + /* skip ones that do not store, or store in refs/tags */ > + if (!rs->items[scan].dst || > + (rs->items[scan].src && > + starts_with(rs->items[scan].src, > + ref_namespace[NAMESPACE_TAGS].ref))) { > + refspec_item_clear(&rs->items[scan]); > + store--; /* compensate for loop's auto increment */ > + continue; > + } > + > + old_dst = rs->items[scan].dst; > strbuf_addstr(&new_dst, ref_namespace[NAMESPACE_PREFETCH].ref); > > /* > @@ -478,11 +479,12 @@ static void filter_prefetch_refspec(struct refspec *rs) > sub = old_dst; > strbuf_addstr(&new_dst, sub); > > - rs->items[i].dst = strbuf_detach(&new_dst, NULL); > - rs->items[i].force = 1; > + rs->items[store].dst = strbuf_detach(&new_dst, NULL); > + rs->items[store].force = 1; > > free(old_dst); > } > + rs->nr = store; > } > > static struct ref *get_ref_map(struct remote *remote, How should we handle the related `remote.<remote-name>.fetch` config? In an earlier discussion, it was discussed that — `.prefetchref` should override `.fetch` completely (instead of patching it) which makes sense to me. At the moment my reading is that `filter_prefetch_refspec` still filters / modifies `remote->fetch`. ``` if (remote) { filter_prefetch_refspec(&remote->fetch); } ``` So we'll need to clear refspecs and re-populate perhaps?
Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > How should we handle the related `remote.<remote-name>.fetch` config? The get_ref_map() helper is what the rest of the code interacts with configured refspec. remote->fetch is handled there and goes through the same filter_prefetch_refspec(). > In an earlier discussion, it was discussed that — > `.prefetchref` should override `.fetch` completely (instead of > patching it) which makes sense to me. Maybe it made sense to you back when it was discussed, but after seeing the current code (before applying this patch), specifically what happens in filter_prefetch_refspec(), it no longer makes much sense to me. Especially it is nonsense to allow .prefetchref to widen the set of refs that are being prefetched beyond what is normally fetched, so we should look at a design that easily allows such a configuration with strong suspicion, I would think.
On Tue, Sep 10, 2024 at 12:03 AM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > How should we handle the related `remote.<remote-name>.fetch` config? > > The get_ref_map() helper is what the rest of the code interacts with > configured refspec. remote->fetch is handled there and goes through > the same filter_prefetch_refspec(). > > > In an earlier discussion, it was discussed that — > > `.prefetchref` should override `.fetch` completely (instead of > > patching it) which makes sense to me. > > Maybe it made sense to you back when it was discussed, but after > seeing the current code (before applying this patch), specifically > what happens in filter_prefetch_refspec(), it no longer makes much > sense to me. > > Especially it is nonsense to allow .prefetchref to widen the set of > refs that are being prefetched beyond what is normally fetched, so > we should look at a design that easily allows such a configuration > with strong suspicion, I would think. Ideally, a repository should be able to specify (say): remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* remote.origin.prefetchref=refs/heads/main This configuration would maintain the normal behavior for fetches, but only prefetch the main branch. The rationale for this is that the main branch typically serves as the HEAD from which future branches will be forked in an active repository. Regarding: > If prefetch_refs contains only positive patterns, then a refspec whose source > doesn't match any of these patterns is rejected. Simply rejecting a source refspec pattern in `remote.<remote>.fetch` wouldn't achieve our goal here. Ideally, we'd need to create a subset of it. What we're looking for is essentially a pattern intersection between the (fetch) `refs/heads/*` and the (prefetchref) `refs/heads/main`, which in this case would result in `refs/heads/main`. However, if I understand correctly, performing such pattern intersections isn't straightforward in the `filter_prefetch_refspec` function (let me know if there' prior art for pattern intersection) I also believe that allowing negative refs might complicate things without providing a clear use case. For instance, how would we handle the intersection of `fetch` and `prefetchref` if `prefetchref` contained both positive and negative patterns? Given that both `fetch` and `prefetchref` could have multiple patterns, it might be simpler and more intuitive for users if we adopt an "A wins over B" approach. However, I'm interested in hearing your thoughts on this matter. Perhaps I should link to the earlier discussion here — Message ID (CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com)
Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > Ideally, a repository should be able to specify (say): > > remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* > remote.origin.prefetchref=refs/heads/main > > This configuration would maintain the normal behavior for fetches, but > only prefetch the main branch. > The rationale for this is that the main branch typically serves as the > HEAD from which future branches will be forked in an active > repository. Oh, that is 100% agreeable. All I wanted to caution you about was what should happen when remote.origin.prefetchref in the above is replaced to something like: [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* prefetchref = refs/notes/* That is, if your refspec used for your real fetch (i.e. "git fetch" without the "--prefetch" option) does not fetch anything from "refs/notes/" hierarchy, prefetching from the hierarchy does not help the real fetch. I do not have a strong preference between marking it as an error and silently ignoring the prefetch but leaning towards the latter, and that is why my suggestion to implement this new "prefetchref" as something that extends the existing filter_prefetch_refspec(), which already filters out refspec that fetches from the refs/tags/ namespace (and the ones that do not store by having NULL in the .dst side). > Regarding: > >> If prefetch_refs contains only positive patterns, then a refspec whose source >> doesn't match any of these patterns is rejected. > > Simply rejecting a source refspec pattern in `remote.<remote>.fetch` > wouldn't achieve our goal here. I used the verb "reject" to mean "filter out", just like a refspec with left-hand-side that begins with "refs/tags/" is filtered out in the current filter_prefetch_refspec(). And that is exactly what we want to achieve our goal here. IOW, you would * read their ref advertisement, and pick only the ones that have a matching pattern in the left-hand-side of a remote.$name.fetch element. With a more recent protocol, remote.$name.fetch may have already participated in narrowing what they advertise to begin with, but the end result is the same. * give it to filter_prefetch_refspec(). * filter_prefetch_refspec() inspects the refspec elements, and rejects ones with no right-hand-side, and ones with left-hand-side that begin with refs/tags/. The current code without your patch already works this way up to this point. * We extend the above filtering so that in addition to the two kinds we currently reject, reject the ones that do not match the prefetchref criteria. This is what is needed to implement "prefetchref configuration limits the set of refs that get prefetched". And what you quoted is a beginning of how "prefetchref configuration limits". It cannot be "add to what filter_prefetch_refspec() did", like done by the implementation in the patch we are discussing. If your configuration were this: [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* you would want a way to say things like (1) I want to prefetch everything I usually fetch (2) Among the ones I usually fetch, I only want to prefetch master and next branches. (3) I want to prefetch only refs/heads/jk/* branches, but not refs/heads/jk/wip/* branches. (4) I want to prefetch everything I usually fetch, except for refs/heads/wip/* branches. The case (1) is the simplest. You will leave .prefetchref empty. For the case (2), you would write something like [remote "origin"] prefetchref = refs/heads/master prefetchref = refs/heads/next So, when your prefetchref has all positive patterns, after the existing conditional in filter_prefetch_refspec() passes a refspec whose right-hand-side (i.e., .dst) is not NULL and whose left-hand-side (i.e., .src) does not begin with "refs/tags/", you further inspect and make sure it matches one of these prefetchref patterns. In example (2), if they advertised master, next, and seen branches, refs/heads/seen would be filtered out because it matches neither of the two patterns, so we would end up prefetching master and next branches. For the case (3), you would want to say something like [remote "origin"] prefetchref = refs/heads/jk/* prefetchref = !refs/heads/jk/wip/* Now your prefetchref has some negative pattern. When filtering what the existing conditional in filter_prefetch_refspec() passed, you'd inspect the refspec element and see if it matches any of the positive patterns, and also if it does not match any of the negative ones. refs/heads/next does not match any positive ones and gets rejected. refs/heads/jk/main does match the positive pattern 'refs/heads/jk/*', and it does not match the negative pattern 'refs/heads/jk/wip/*', so it passes and will get prefetched. For the case (4), you would write something like [remote "origin"] prefetchref = !refs/heads/wip/* There is no positive pattern, so if you blindly apply the rule you used for (3) above, everything will get rejected, which is not what you want. refs/heads/main does not match any positive patterns (because there are no positive patterns given), but it does not match any negative ones, so it passes and will get prefetched. The condition to implement the above four cases (which I think covers all the cases we care about, but I won't guarantee it is exhaustive---you'd need to sanity check) would be - If there is 1 or more positive prefetchref patterns, the refspec element must match one of them to be considered for the next rule. Otherwise, it will not be prefetched. - If the refspec element matches any of negative prefetchref patterns, it will not be prefetched.
On Fri, Sep 13, 2024 at 10:28 PM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > Ideally, a repository should be able to specify (say): > > > > remote.origin.fetch=+refs/heads/*:refs/remotes/origin/* > > remote.origin.prefetchref=refs/heads/main > > > > This configuration would maintain the normal behavior for fetches, but > > only prefetch the main branch. > > The rationale for this is that the main branch typically serves as the > > HEAD from which future branches will be forked in an active > > repository. > > Oh, that is 100% agreeable. All I wanted to caution you about was > what should happen when remote.origin.prefetchref in the above is > replaced to something like: > > [remote "origin"] > fetch = +refs/heads/*:refs/remotes/origin/* > prefetchref = refs/notes/* > > That is, if your refspec used for your real fetch (i.e. "git fetch" > without the "--prefetch" option) does not fetch anything from > "refs/notes/" hierarchy, prefetching from the hierarchy does not > help the real fetch. I do not have a strong preference between > marking it as an error and silently ignoring the prefetch but > leaning towards the latter, and that is why my suggestion to > implement this new "prefetchref" as something that extends the > existing filter_prefetch_refspec(), which already filters out > refspec that fetches from the refs/tags/ namespace (and the ones > that do not store by having NULL in the .dst side). > > > Regarding: > > > >> If prefetch_refs contains only positive patterns, then a refspec whose source > >> doesn't match any of these patterns is rejected. > > > > Simply rejecting a source refspec pattern in `remote.<remote>.fetch` > > wouldn't achieve our goal here. > > I used the verb "reject" to mean "filter out", just like a refspec > with left-hand-side that begins with "refs/tags/" is filtered out > in the current filter_prefetch_refspec(). And that is exactly what > we want to achieve our goal here. > > IOW, you would > > * read their ref advertisement, and pick only the ones that have a > matching pattern in the left-hand-side of a remote.$name.fetch > element. With a more recent protocol, remote.$name.fetch may > have already participated in narrowing what they advertise to > begin with, but the end result is the same. > > * give it to filter_prefetch_refspec(). > > * filter_prefetch_refspec() inspects the refspec elements, and > rejects ones with no right-hand-side, and ones with > left-hand-side that begin with refs/tags/. The current code > without your patch already works this way up to this point. > > * We extend the above filtering so that in addition to the two > kinds we currently reject, reject the ones that do not match the > prefetchref criteria. This is what is needed to implement > "prefetchref configuration limits the set of refs that get > prefetched". > > And what you quoted is a beginning of how "prefetchref configuration > limits". It cannot be "add to what filter_prefetch_refspec() did", > like done by the implementation in the patch we are discussing. > > If your configuration were this: > > [remote "origin"] > fetch = +refs/heads/*:refs/remotes/origin/* > > you would want a way to say things like > > (1) I want to prefetch everything I usually fetch > > (2) Among the ones I usually fetch, I only want to prefetch master > and next branches. > > (3) I want to prefetch only refs/heads/jk/* branches, but not > refs/heads/jk/wip/* branches. > > (4) I want to prefetch everything I usually fetch, except for > refs/heads/wip/* branches. > > The case (1) is the simplest. You will leave .prefetchref empty. > > For the case (2), you would write something like > > [remote "origin"] > prefetchref = refs/heads/master > prefetchref = refs/heads/next > > So, when your prefetchref has all positive patterns, after the > existing conditional in filter_prefetch_refspec() passes a refspec > whose right-hand-side (i.e., .dst) is not NULL and whose > left-hand-side (i.e., .src) does not begin with "refs/tags/", you > further inspect and make sure it matches one of these prefetchref > patterns. In example (2), if they advertised master, next, and seen > branches, refs/heads/seen would be filtered out because it matches > neither of the two patterns, so we would end up prefetching master > and next branches. > > For the case (3), you would want to say something like > > [remote "origin"] > prefetchref = refs/heads/jk/* > prefetchref = !refs/heads/jk/wip/* > > Now your prefetchref has some negative pattern. When filtering what > the existing conditional in filter_prefetch_refspec() passed, you'd > inspect the refspec element and see if it matches any of the > positive patterns, and also if it does not match any of the negative > ones. refs/heads/next does not match any positive ones and gets > rejected. refs/heads/jk/main does match the positive pattern > 'refs/heads/jk/*', and it does not match the negative pattern > 'refs/heads/jk/wip/*', so it passes and will get prefetched. > > For the case (4), you would write something like > > [remote "origin"] > prefetchref = !refs/heads/wip/* > > There is no positive pattern, so if you blindly apply the rule you > used for (3) above, everything will get rejected, which is not what > you want. refs/heads/main does not match any positive patterns > (because there are no positive patterns given), but it does not > match any negative ones, so it passes and will get prefetched. > > The condition to implement the above four cases (which I think > covers all the cases we care about, but I won't guarantee it is > exhaustive---you'd need to sanity check) would be > > - If there is 1 or more positive prefetchref patterns, the refspec > element must match one of them to be considered for the next > rule. Otherwise, it will not be prefetched. > > - If the refspec element matches any of negative prefetchref > patterns, it will not be prefetched. > If we're trying to determine if a pattern (remote.<remote>.prefetchref) is a subset of another or not (remote.<remote>.fetch) (to not accidentally expand the scope beyond `fetch`), we'd need a function that does that pattern-to-pattern. Are you aware of any existing functions that do so? I see `wildmatch`, but that is only used for pattern to full-text comparisons.
Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > If we're trying to determine if a pattern > (remote.<remote>.prefetchref) is a subset of another or not > (remote.<remote>.fetch) (to not accidentally expand the scope beyond > `fetch`), > we'd need a function that does that pattern-to-pattern. Are you aware > of any existing functions that do so? There is no such computation for this application. Such a computation might become needed if you wanted to complain that the user gave .prefetchref pattern that would never match what .fetch patterns would allow to pass. But there is no such need. You will first get the advertised refs from the remote. Existing logic filteres them down to what matches configured remote.$name.fetch variable. filter_prefetch_refspec() may further reduces the result by removing those whose .src side begins with "refs/tags/". Now you only look at what survived the above existing filtering, and further narrow it down by picking only ones that match the prefetch condition. If the refspec that survived the filtering by the fetch refspec (and existing logic in filter_prefetch_refspec()) does not satisfy the prefetch condition, it won't be prefetched. Since you are using .prefetch ONLY TO narrow the result down, by definition, you are not adding anything what .fetch configuration would not have fetched.
On Sun, Sep 15, 2024 at 1:41 AM Junio C Hamano <gitster@pobox.com> wrote: > > Shubham Kanodia <shubham.kanodia10@gmail.com> writes: > > > If we're trying to determine if a pattern > > (remote.<remote>.prefetchref) is a subset of another or not > > (remote.<remote>.fetch) (to not accidentally expand the scope beyond > > `fetch`), > > we'd need a function that does that pattern-to-pattern. Are you aware > > of any existing functions that do so? > > There is no such computation for this application. Such a > computation might become needed if you wanted to complain that the > user gave .prefetchref pattern that would never match what .fetch > patterns would allow to pass. But there is no such need. > > You will first get the advertised refs from the remote. > > Existing logic filteres them down to what matches configured > remote.$name.fetch variable. filter_prefetch_refspec() may further > reduces the result by removing those whose .src side begins with > "refs/tags/". > > Now you only look at what survived the above existing filtering, and > further narrow it down by picking only ones that match the prefetch > condition. If the refspec that survived the filtering by the fetch > refspec (and existing logic in filter_prefetch_refspec()) does not > satisfy the prefetch condition, it won't be prefetched. > > Since you are using .prefetch ONLY TO narrow the result down, by > definition, you are not adding anything what .fetch configuration > would not have fetched. > > Ah I see — I assumed you expected all filtering for `prefetch` (existing & new) to happen inside `filter_prefetch_refspec`. But that threw me off, because `filter_prefetch_refspec` doesn't deal with advertised refs from remote, and only patterns. Let me know if the diff in the following mail is closer to what you were expecting?
Junio C Hamano <gitster@pobox.com> writes: > Existing logic filteres them down to what matches configured > remote.$name.fetch variable. filter_prefetch_refspec() may further > reduces the result by removing those whose .src side begins with > "refs/tags/". > > Now you only look at what survived the above existing filtering, and > further narrow it down by picking only ones that match the prefetch > condition. If the refspec that survived the filtering by the fetch > refspec (and existing logic in filter_prefetch_refspec()) does not > satisfy the prefetch condition, it won't be prefetched. Sorry, but I misread the code. By the time filter_prefetch_refspec() is called by get_ref_map(), this caller has "remote_refs" linked list that describes each ref it is going to fetch, so conceptually what is left for the prefetch logic to do is to selectively discard the elements on this list that are not worth asking the remote to send new object data for and use the remainder of the list in remote_refs list, and the logic to further limit this list with the prefetchref configuration would fit well here, but filter_prefetch_refspec() does not work on this list at all X-<. So the prefetchref limitation needs to come outside the function.
On Sun, Sep 15, 2024 at 9:42 PM Junio C Hamano <gitster@pobox.com> wrote: > > Junio C Hamano <gitster@pobox.com> writes: > > > Existing logic filteres them down to what matches configured > > remote.$name.fetch variable. filter_prefetch_refspec() may further > > reduces the result by removing those whose .src side begins with > > "refs/tags/". > > > > Now you only look at what survived the above existing filtering, and > > further narrow it down by picking only ones that match the prefetch > > condition. If the refspec that survived the filtering by the fetch > > refspec (and existing logic in filter_prefetch_refspec()) does not > > satisfy the prefetch condition, it won't be prefetched. > > Sorry, but I misread the code. > > By the time filter_prefetch_refspec() is called by get_ref_map(), > this caller has "remote_refs" linked list that describes each ref it > is going to fetch, so conceptually what is left for the prefetch > logic to do is to selectively discard the elements on this list that > are not worth asking the remote to send new object data for and use > the remainder of the list in remote_refs list, and the logic to > further limit this list with the prefetchref configuration would fit > well here, but filter_prefetch_refspec() does not work on this list > at all X-<. So the prefetchref limitation needs to come outside the > function. Sure, can you take a look at the latest diff (shared in the last reply)? Moved the prefetchref filtering to `get_ref_map` where advertised refs are available.
diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt index 8efc53e836d..b25d76dd3b1 100644 --- a/Documentation/config/remote.txt +++ b/Documentation/config/remote.txt @@ -33,6 +33,12 @@ remote.<name>.fetch:: The default set of "refspec" for linkgit:git-fetch[1]. See linkgit:git-fetch[1]. +remote.<name>.prefetchref:: + Specify the refs to be prefetched when fetching from this remote. + The value is a space-separated list of ref patterns (e.g., "refs/heads/master refs/heads/develop*"). + These patterns are used as the source part of the refspecs for prefetching. + This can be used to optimize fetch operations by specifying exactly which refs should be prefetched. + remote.<name>.push:: The default set of "refspec" for linkgit:git-push[1]. See linkgit:git-push[1]. diff --git a/builtin/fetch.c b/builtin/fetch.c index b2b5aee5bf2..6e584fa2ebb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -434,6 +434,30 @@ static void find_non_local_tags(const struct ref *refs, oidset_clear(&fetch_oids); } +static void apply_prefetch_refspec(struct remote *remote, struct refspec *rs) +{ + if (remote->prefetch_refs.nr > 0) { + int i; + for (i = 0; i < remote->prefetch_refs.nr; i++) { + const char *src = remote->prefetch_refs.items[i].string; + struct strbuf dst = STRBUF_INIT; + + strbuf_addf(&dst, "refs/prefetch/%s/", remote->name); + if (starts_with(src, "refs/heads/")) { + strbuf_addstr(&dst, src + 11); + } else if (starts_with(src, "refs/")) { + strbuf_addstr(&dst, src + 5); + } else { + strbuf_addstr(&dst, src); + } + + refspec_appendf(rs, "%s:%s", src, dst.buf); + strbuf_release(&dst); + } + } +} + + static void filter_prefetch_refspec(struct refspec *rs) { int i; @@ -502,8 +526,11 @@ static struct ref *get_ref_map(struct remote *remote, int existing_refs_populated = 0; filter_prefetch_refspec(rs); - if (remote) + if (remote) { filter_prefetch_refspec(&remote->fetch); + if (prefetch) + apply_prefetch_refspec(remote, rs); + } if (rs->nr) { struct refspec *fetch_refspec; diff --git a/remote.c b/remote.c index 8f3dee13186..b46d62b2c47 100644 --- a/remote.c +++ b/remote.c @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state, ret->prune = -1; /* unspecified */ ret->prune_tags = -1; /* unspecified */ ret->name = xstrndup(name, len); + string_list_init_dup(&ret->prefetch_refs); refspec_init(&ret->push, REFSPEC_PUSH); refspec_init(&ret->fetch, REFSPEC_FETCH); @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote) free((char *)remote->uploadpack); FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); + string_list_clear(&remote->prefetch_refs, 0); } static void add_merge(struct branch *branch, const char *name) @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value, remote->prune = git_config_bool(key, value); else if (!strcmp(subkey, "prunetags")) remote->prune_tags = git_config_bool(key, value); + else if (!strcmp(subkey, "prefetchref")) { + if (!value) + return config_error_nonbool(key); + string_list_split(&remote->prefetch_refs, value, ' ', -1); + return 0; + } else if (!strcmp(subkey, "url")) { if (!value) return config_error_nonbool(key); diff --git a/remote.h b/remote.h index b901b56746d..c18e68e0d8d 100644 --- a/remote.h +++ b/remote.h @@ -5,6 +5,7 @@ #include "hashmap.h" #include "refspec.h" #include "strvec.h" +#include "string-list.h" struct option; struct transport_ls_refs_options; @@ -77,6 +78,8 @@ struct remote { struct refspec fetch; + struct string_list prefetch_refs; + /* * The setting for whether to fetch tags (as a separate rule from the * configured refspecs); diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index abae7a97546..2ad5b922d83 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -245,6 +245,76 @@ test_expect_success 'prefetch multiple remotes' ' test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt ' +test_expect_success 'prefetch only acts on remote.<name>.prefetchref refs if present' ' + test_create_repo prefetch-test-mixed-patterns && + ( + cd prefetch-test-mixed-patterns && + test_commit initial && + git clone . clone1 && + git clone . clone2 && + + git remote add remote1 "file://$(pwd)/clone1" && + git remote add remote2 "file://$(pwd)/clone2" && + + # Set single prefetchref pattern for remote1 and multiple for remote2 + git config remote.remote1.prefetchref "refs/heads/foo" && + git config remote.remote2.prefetchref "refs/heads/feature/* refs/heads/topic" && + + # Create branches in clone1 and push + ( + cd clone1 && + git checkout -b foo && + test_commit foo-commit && + git checkout -b feature/a && + test_commit feature-a-commit && + git checkout -b other && + test_commit other-commit && + git push origin foo feature/a other + ) && + + # Create branches in clone2 and push + ( + cd clone2 && + git checkout -b topic && + test_commit master-commit && + git checkout -b feature/x && + test_commit feature-x-commit && + git checkout -b feature/y && + test_commit feature-y-commit && + git checkout -b dev && + test_commit dev-commit && + git push origin topic feature/x feature/y dev + ) && + + # Run maintenance prefetch task + GIT_TRACE2_EVENT="$(pwd)/prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && + + # Check that only specified refs were prefetched + fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" && + test_subcommand git fetch remote1 $fetchargs <prefetch.txt && + test_subcommand git fetch remote2 $fetchargs <prefetch.txt && + ls -R .git/refs/prefetch && + + # Verify that only specified refs are in the prefetch refs for remote1 + git rev-parse refs/prefetch/remotes/remote1/foo && + test_must_fail git rev-parse refs/prefetch/remotes/remote1/feature/a && + test_must_fail git rev-parse refs/prefetch/remotes/remote1/other && + + # Verify that only specified refs are in the prefetch refs for remote2 + git rev-parse refs/prefetch/remotes/remote2/feature/x && + git rev-parse refs/prefetch/remotes/remote2/feature/y && + git rev-parse refs/prefetch/remotes/remote2/topic && + test_must_fail git rev-parse refs/prefetch/remotes/remote2/dev && + + # Fetch all refs and compare + git fetch --all && + test_cmp_rev refs/remotes/remote1/foo refs/prefetch/remotes/remote1/foo && + test_cmp_rev refs/remotes/remote2/feature/x refs/prefetch/remotes/remote2/feature/x && + test_cmp_rev refs/remotes/remote2/feature/y refs/prefetch/remotes/remote2/feature/y && + test_cmp_rev refs/remotes/remote2/topic refs/prefetch/remotes/remote2/topic + ) +' + test_expect_success 'loose-objects task' ' # Repack everything so we know the state of the object dir git repack -adk &&