diff mbox series

[9/9] fetch-pack: drop custom loose object cache

Message ID 20181112145558.GI7400@sigill.intra.peff.net
State New, archived
Headers show
Series caching loose objects | expand

Commit Message

Jeff King Nov. 12, 2018, 2:55 p.m. UTC
Commit 024aa4696c (fetch-pack.c: use oidset to check existence of loose
object, 2018-03-14) added a cache to avoid calling stat() for a bunch of
loose objects we don't have.

Now that OBJECT_INFO_QUICK handles this caching itself, we can drop the
custom solution.

Note that this might perform slightly differently, as the original code
stopped calling readdir() when we saw more loose objects than there were
refs. So:

  1. The old code might have spent work on readdir() to fill the cache,
     but then decided there were too many loose objects, wasting that
     effort.

  2. The new code might spend a lot of time on readdir() if you have a
     lot of loose objects, even though there are very few objects to
     ask about.

In practice it probably won't matter either way; see the previous commit
for some discussion of the tradeoff.

Signed-off-by: Jeff King <peff@peff.net>
---
 fetch-pack.c | 39 ++-------------------------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

Comments

René Scharfe Nov. 12, 2018, 7:25 p.m. UTC | #1
Am 12.11.2018 um 15:55 schrieb Jeff King:
> Commit 024aa4696c (fetch-pack.c: use oidset to check existence of loose
> object, 2018-03-14) added a cache to avoid calling stat() for a bunch of
> loose objects we don't have.
> 
> Now that OBJECT_INFO_QUICK handles this caching itself, we can drop the
> custom solution.
> 
> Note that this might perform slightly differently, as the original code
> stopped calling readdir() when we saw more loose objects than there were
> refs. So:
> 
>   1. The old code might have spent work on readdir() to fill the cache,
>      but then decided there were too many loose objects, wasting that
>      effort.
> 
>   2. The new code might spend a lot of time on readdir() if you have a
>      lot of loose objects, even though there are very few objects to
>      ask about.

Plus the old code used an oidset while the new one uses an oid_array.

> In practice it probably won't matter either way; see the previous commit
> for some discussion of the tradeoff.
> 
> Signed-off-by: Jeff King <peff@peff.net>
> ---
>  fetch-pack.c | 39 ++-------------------------------------
>  1 file changed, 2 insertions(+), 37 deletions(-)
> 
> diff --git a/fetch-pack.c b/fetch-pack.c
> index b3ed7121bc..25a88f4eb2 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -636,23 +636,6 @@ struct loose_object_iter {
>  	struct ref *refs;
>  };
>  
> -/*
> - *  If the number of refs is not larger than the number of loose objects,
> - *  this function stops inserting.
> - */
> -static int add_loose_objects_to_set(const struct object_id *oid,
> -				    const char *path,
> -				    void *data)
> -{
> -	struct loose_object_iter *iter = data;
> -	oidset_insert(iter->loose_object_set, oid);
> -	if (iter->refs == NULL)
> -		return 1;
> -
> -	iter->refs = iter->refs->next;
> -	return 0;
> -}
> -
>  /*
>   * Mark recent commits available locally and reachable from a local ref as
>   * COMPLETE. If args->no_dependents is false, also mark COMPLETE remote refs as
> @@ -670,30 +653,14 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
>  	struct ref *ref;
>  	int old_save_commit_buffer = save_commit_buffer;
>  	timestamp_t cutoff = 0;
> -	struct oidset loose_oid_set = OIDSET_INIT;
> -	int use_oidset = 0;
> -	struct loose_object_iter iter = {&loose_oid_set, *refs};
> -
> -	/* Enumerate all loose objects or know refs are not so many. */
> -	use_oidset = !for_each_loose_object(add_loose_objects_to_set,
> -					    &iter, 0);
>  
>  	save_commit_buffer = 0;
>  
>  	for (ref = *refs; ref; ref = ref->next) {
>  		struct object *o;
> -		unsigned int flags = OBJECT_INFO_QUICK;
>  
> -		if (use_oidset &&
> -		    !oidset_contains(&loose_oid_set, &ref->old_oid)) {
> -			/*
> -			 * I know this does not exist in the loose form,
> -			 * so check if it exists in a non-loose form.
> -			 */
> -			flags |= OBJECT_INFO_IGNORE_LOOSE;

This removes the only user of OBJECT_INFO_IGNORE_LOOSE.  #leftoverbits

> -		}
> -
> -		if (!has_object_file_with_flags(&ref->old_oid, flags))
> +		if (!has_object_file_with_flags(&ref->old_oid,
> +						OBJECT_INFO_QUICK))
>  			continue;
>  		o = parse_object(the_repository, &ref->old_oid);
>  		if (!o)
> @@ -710,8 +677,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
>  		}
>  	}
>  
> -	oidset_clear(&loose_oid_set);
> -
>  	if (!args->deepen) {
>  		for_each_ref(mark_complete_oid, NULL);
>  		for_each_cached_alternate(NULL, mark_alternate_complete);
>
Ævar Arnfjörð Bjarmason Nov. 12, 2018, 7:32 p.m. UTC | #2
On Mon, Nov 12 2018, René Scharfe wrote:

> Am 12.11.2018 um 15:55 schrieb Jeff King:
>> Commit 024aa4696c (fetch-pack.c: use oidset to check existence of loose
>> object, 2018-03-14) added a cache to avoid calling stat() for a bunch of
>> loose objects we don't have.
>>
>> Now that OBJECT_INFO_QUICK handles this caching itself, we can drop the
>> custom solution.
>>
>> Note that this might perform slightly differently, as the original code
>> stopped calling readdir() when we saw more loose objects than there were
>> refs. So:
>>
>>   1. The old code might have spent work on readdir() to fill the cache,
>>      but then decided there were too many loose objects, wasting that
>>      effort.
>>
>>   2. The new code might spend a lot of time on readdir() if you have a
>>      lot of loose objects, even though there are very few objects to
>>      ask about.
>
> Plus the old code used an oidset while the new one uses an oid_array.
>
>> In practice it probably won't matter either way; see the previous commit
>> for some discussion of the tradeoff.
>>
>> Signed-off-by: Jeff King <peff@peff.net>
>> ---
>>  fetch-pack.c | 39 ++-------------------------------------
>>  1 file changed, 2 insertions(+), 37 deletions(-)
>>
>> diff --git a/fetch-pack.c b/fetch-pack.c
>> index b3ed7121bc..25a88f4eb2 100644
>> --- a/fetch-pack.c
>> +++ b/fetch-pack.c
>> @@ -636,23 +636,6 @@ struct loose_object_iter {
>>  	struct ref *refs;
>>  };
>>
>> -/*
>> - *  If the number of refs is not larger than the number of loose objects,
>> - *  this function stops inserting.
>> - */
>> -static int add_loose_objects_to_set(const struct object_id *oid,
>> -				    const char *path,
>> -				    void *data)
>> -{
>> -	struct loose_object_iter *iter = data;
>> -	oidset_insert(iter->loose_object_set, oid);
>> -	if (iter->refs == NULL)
>> -		return 1;
>> -
>> -	iter->refs = iter->refs->next;
>> -	return 0;
>> -}
>> -
>>  /*
>>   * Mark recent commits available locally and reachable from a local ref as
>>   * COMPLETE. If args->no_dependents is false, also mark COMPLETE remote refs as
>> @@ -670,30 +653,14 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
>>  	struct ref *ref;
>>  	int old_save_commit_buffer = save_commit_buffer;
>>  	timestamp_t cutoff = 0;
>> -	struct oidset loose_oid_set = OIDSET_INIT;
>> -	int use_oidset = 0;
>> -	struct loose_object_iter iter = {&loose_oid_set, *refs};
>> -
>> -	/* Enumerate all loose objects or know refs are not so many. */
>> -	use_oidset = !for_each_loose_object(add_loose_objects_to_set,
>> -					    &iter, 0);
>>
>>  	save_commit_buffer = 0;
>>
>>  	for (ref = *refs; ref; ref = ref->next) {
>>  		struct object *o;
>> -		unsigned int flags = OBJECT_INFO_QUICK;
>>
>> -		if (use_oidset &&
>> -		    !oidset_contains(&loose_oid_set, &ref->old_oid)) {
>> -			/*
>> -			 * I know this does not exist in the loose form,
>> -			 * so check if it exists in a non-loose form.
>> -			 */
>> -			flags |= OBJECT_INFO_IGNORE_LOOSE;
>
> This removes the only user of OBJECT_INFO_IGNORE_LOOSE.  #leftoverbits

With this series applied there's still a use of it left in
oid_object_info_extended()
Jeff King Nov. 12, 2018, 8:07 p.m. UTC | #3
On Mon, Nov 12, 2018 at 08:32:43PM +0100, Ævar Arnfjörð Bjarmason wrote:

> >>  	for (ref = *refs; ref; ref = ref->next) {
> >>  		struct object *o;
> >> -		unsigned int flags = OBJECT_INFO_QUICK;
> >>
> >> -		if (use_oidset &&
> >> -		    !oidset_contains(&loose_oid_set, &ref->old_oid)) {
> >> -			/*
> >> -			 * I know this does not exist in the loose form,
> >> -			 * so check if it exists in a non-loose form.
> >> -			 */
> >> -			flags |= OBJECT_INFO_IGNORE_LOOSE;
> >
> > This removes the only user of OBJECT_INFO_IGNORE_LOOSE.  #leftoverbits
> 
> With this series applied there's still a use of it left in
> oid_object_info_extended()

That's just the code that does something with the flag. No callers pass
it in anymore, so we could drop the flag _and_ that code.

-Peff
René Scharfe Nov. 12, 2018, 8:13 p.m. UTC | #4
Am 12.11.2018 um 20:32 schrieb Ævar Arnfjörð Bjarmason:
> 
> On Mon, Nov 12 2018, René Scharfe wrote:
>> This removes the only user of OBJECT_INFO_IGNORE_LOOSE.  #leftoverbits
> 
> With this series applied there's still a use of it left in
> oid_object_info_extended()

OK, rephrasing: With that patch, OBJECT_INFO_IGNORE_LOOSE is never set
anymore, and its check in oid_object_info_extended() as well as its
definition can be removed.

René
diff mbox series

Patch

diff --git a/fetch-pack.c b/fetch-pack.c
index b3ed7121bc..25a88f4eb2 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -636,23 +636,6 @@  struct loose_object_iter {
 	struct ref *refs;
 };
 
-/*
- *  If the number of refs is not larger than the number of loose objects,
- *  this function stops inserting.
- */
-static int add_loose_objects_to_set(const struct object_id *oid,
-				    const char *path,
-				    void *data)
-{
-	struct loose_object_iter *iter = data;
-	oidset_insert(iter->loose_object_set, oid);
-	if (iter->refs == NULL)
-		return 1;
-
-	iter->refs = iter->refs->next;
-	return 0;
-}
-
 /*
  * Mark recent commits available locally and reachable from a local ref as
  * COMPLETE. If args->no_dependents is false, also mark COMPLETE remote refs as
@@ -670,30 +653,14 @@  static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
 	struct ref *ref;
 	int old_save_commit_buffer = save_commit_buffer;
 	timestamp_t cutoff = 0;
-	struct oidset loose_oid_set = OIDSET_INIT;
-	int use_oidset = 0;
-	struct loose_object_iter iter = {&loose_oid_set, *refs};
-
-	/* Enumerate all loose objects or know refs are not so many. */
-	use_oidset = !for_each_loose_object(add_loose_objects_to_set,
-					    &iter, 0);
 
 	save_commit_buffer = 0;
 
 	for (ref = *refs; ref; ref = ref->next) {
 		struct object *o;
-		unsigned int flags = OBJECT_INFO_QUICK;
 
-		if (use_oidset &&
-		    !oidset_contains(&loose_oid_set, &ref->old_oid)) {
-			/*
-			 * I know this does not exist in the loose form,
-			 * so check if it exists in a non-loose form.
-			 */
-			flags |= OBJECT_INFO_IGNORE_LOOSE;
-		}
-
-		if (!has_object_file_with_flags(&ref->old_oid, flags))
+		if (!has_object_file_with_flags(&ref->old_oid,
+						OBJECT_INFO_QUICK))
 			continue;
 		o = parse_object(the_repository, &ref->old_oid);
 		if (!o)
@@ -710,8 +677,6 @@  static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
 		}
 	}
 
-	oidset_clear(&loose_oid_set);
-
 	if (!args->deepen) {
 		for_each_ref(mark_complete_oid, NULL);
 		for_each_cached_alternate(NULL, mark_alternate_complete);