diff mbox series

[v2,4/7] fetch: only populate existing_refs if needed

Message ID 77bc83e7f2c0a9c95e2ff31aa7a11295bbdf054c.1597184949.git.jonathantanmy@google.com (mailing list archive)
State New, archived
Headers show
Series Lazy fetch with subprocess | expand

Commit Message

Jonathan Tan Aug. 11, 2020, 10:52 p.m. UTC
When fetching tags, Git only writes tags that do not already exist in
the client repository. This necessitates an iteration over all the refs,
but fetch performs this iteration even if no tags are fetched.

This issue is more severe in a partial clone because the iteration over
refs also checks that the targets of those refs are present,
necessitating a lazy fetch if the target is missing.

Therefore, iterate over the refs only when necessary. The user can avoid
this iteration by refraining from fetching tags, for example, by passing
--no-tags as an argument. A subsequent patch will also teach Git to use
"git fetch" to lazy-fetch missing objects in a partial clone, thus also
making use of this change.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 builtin/fetch.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

Comments

Junio C Hamano Aug. 12, 2020, 6:06 p.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> When fetching tags, Git only writes tags that do not already exist in
> the client repository. This necessitates an iteration over all the refs,
> but fetch performs this iteration even if no tags are fetched.

Hmph, the change itself mechanically makes sense (i.e. if the
ref_map has no entry with peer_ref defined, existing_refs hashmap is
never looked up, so there is no reason to populate it), but the
explanation of log message, especially the part that places strees
on tags, does not.  Wouldn't a plain vanilla "git fetch" with no
argument that uses the remote.origin.fetch populated with standard
refspecs to maintain remote-tracking branches use this hash?

I think the readers need to be somehow made aware of the fact that
the author of this commit message is concentrated primarily on fetch
used as a mechanism to grab specific objects, often used by lazy
fetch, without touching any remote-tracking refs.  Because tag
following is on by default (which is a convenient default for the
plain vanilla fetches that updates remote-tracking branches),
however, such a "these specific objects only" fetch still tries to
trigger the auto following of tags, and necessitates "--no-tags" to
take advantage of the optimization proposed by this patch.

So nothing written in the proposed log message is incorrect per-se,
but it is not very friendly to readers without clarivoyance.

> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index 29db219c68..6460ce3f4e 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -449,6 +449,7 @@ static struct ref *get_ref_map(struct remote *remote,
>  	struct ref *orefs = NULL, **oref_tail = &orefs;
>  
>  	struct hashmap existing_refs;
> +	int existing_refs_populated = 0;
>  
>  	if (rs->nr) {
>  		struct refspec *fetch_refspec;
> @@ -542,15 +543,18 @@ static struct ref *get_ref_map(struct remote *remote,
>  
>  	ref_map = ref_remove_duplicates(ref_map);
>  
> -	refname_hash_init(&existing_refs);
> -	for_each_ref(add_one_refname, &existing_refs);
> -
>  	for (rm = ref_map; rm; rm = rm->next) {
>  		if (rm->peer_ref) {
>  			const char *refname = rm->peer_ref->name;
>  			struct refname_hash_entry *peer_item;
>  			unsigned int hash = strhash(refname);
>  
> +			if (!existing_refs_populated) {
> +				refname_hash_init(&existing_refs);
> +				for_each_ref(add_one_refname, &existing_refs);
> +				existing_refs_populated = 1;
> +			}
> +
>  			peer_item = hashmap_get_entry_from_hash(&existing_refs,
>  						hash, refname,
>  						struct refname_hash_entry, ent);
> @@ -560,7 +564,8 @@ static struct ref *get_ref_map(struct remote *remote,
>  			}
>  		}
>  	}
> -	hashmap_free_entries(&existing_refs, struct refname_hash_entry, ent);
> +	if (existing_refs_populated)
> +		hashmap_free_entries(&existing_refs, struct refname_hash_entry, ent);
>  
>  	return ref_map;
>  }
diff mbox series

Patch

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 29db219c68..6460ce3f4e 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -449,6 +449,7 @@  static struct ref *get_ref_map(struct remote *remote,
 	struct ref *orefs = NULL, **oref_tail = &orefs;
 
 	struct hashmap existing_refs;
+	int existing_refs_populated = 0;
 
 	if (rs->nr) {
 		struct refspec *fetch_refspec;
@@ -542,15 +543,18 @@  static struct ref *get_ref_map(struct remote *remote,
 
 	ref_map = ref_remove_duplicates(ref_map);
 
-	refname_hash_init(&existing_refs);
-	for_each_ref(add_one_refname, &existing_refs);
-
 	for (rm = ref_map; rm; rm = rm->next) {
 		if (rm->peer_ref) {
 			const char *refname = rm->peer_ref->name;
 			struct refname_hash_entry *peer_item;
 			unsigned int hash = strhash(refname);
 
+			if (!existing_refs_populated) {
+				refname_hash_init(&existing_refs);
+				for_each_ref(add_one_refname, &existing_refs);
+				existing_refs_populated = 1;
+			}
+
 			peer_item = hashmap_get_entry_from_hash(&existing_refs,
 						hash, refname,
 						struct refname_hash_entry, ent);
@@ -560,7 +564,8 @@  static struct ref *get_ref_map(struct remote *remote,
 			}
 		}
 	}
-	hashmap_free_entries(&existing_refs, struct refname_hash_entry, ent);
+	if (existing_refs_populated)
+		hashmap_free_entries(&existing_refs, struct refname_hash_entry, ent);
 
 	return ref_map;
 }