diff mbox series

[v2,2/2] oidset: use khash

Message ID 5efe6695-2e82-786c-1170-7874978cb534@web.de (mailing list archive)
State New, archived
Headers show
Series oidset: use khash | expand

Commit Message

René Scharfe Oct. 3, 2018, 1:16 p.m. UTC
Reimplement oidset using khash.h in order to reduce its memory footprint
and make it faster.

Performance of a command that mainly checks for duplicate objects using
an oidset, with master and Clang 6.0.1:

  $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'"

  $ /usr/bin/time $cmd >/dev/null
  0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k
  0inputs+0outputs (0major+11204minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     250.0 ms ±   6.0 ms    [User: 225.9 ms, System: 23.6 ms]

    Range (min … max):   242.0 ms … 261.1 ms

And with this patch:

  $ /usr/bin/time $cmd >/dev/null
  0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k
  0inputs+0outputs (0major+8318minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     151.9 ms ±   4.9 ms    [User: 130.5 ms, System: 21.2 ms]

    Range (min … max):   148.2 ms … 170.4 ms

Initial-patch-by: Jeff King <peff@peff.net>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 fetch-pack.c |  2 +-
 oidset.c     | 34 ++++++++++++----------------------
 oidset.h     | 36 ++++++++++++++++++++++++++++--------
 3 files changed, 41 insertions(+), 31 deletions(-)

Comments

Jeff King Oct. 3, 2018, 7:40 p.m. UTC | #1
On Wed, Oct 03, 2018 at 03:16:39PM +0200, René Scharfe wrote:

> Performance of a command that mainly checks for duplicate objects using
> an oidset, with master and Clang 6.0.1:
> 
>   $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'"
> 
>   $ /usr/bin/time $cmd >/dev/null
>   0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k
>   0inputs+0outputs (0major+11204minor)pagefaults 0swaps
> 
>   $ hyperfine "$cmd"
>   Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'
> 
>     Time (mean ± σ):     250.0 ms ±   6.0 ms    [User: 225.9 ms, System: 23.6 ms]
> 
>     Range (min … max):   242.0 ms … 261.1 ms
> 
> And with this patch:
> 
>   $ /usr/bin/time $cmd >/dev/null
>   0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k
>   0inputs+0outputs (0major+8318minor)pagefaults 0swaps
> 
>   $ hyperfine "$cmd"
>   Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'
> 
>     Time (mean ± σ):     151.9 ms ±   4.9 ms    [User: 130.5 ms, System: 21.2 ms]
> 
>     Range (min … max):   148.2 ms … 170.4 ms

Thanks. Just to reinforce these timings, a similar command on linux.git
drops from 4.0s to 2.3s.

The patch itself mostly looks good. A few small comments:

> diff --git a/fetch-pack.c b/fetch-pack.c
> index 75047a4b2a..a839315726 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -536,7 +536,7 @@ static int tip_oids_contain(struct oidset *tip_oids,
>  	 * add to "newlist" between calls, the additions will always be for
>  	 * oids that are already in the set.
>  	 */
> -	if (!tip_oids->map.map.tablesize) {
> +	if (!tip_oids->set.n_buckets) {
>  		add_refs_to_oidset(tip_oids, unmatched);
>  		add_refs_to_oidset(tip_oids, newlist);
>  	}

This is a little intimate with the implementation of khash, but I think
it's probably OK (and really no worse than what was there before).

As the comment above notes, I think we're really looking at the case
where this gets populated on the first call, but not subsequent ones. It
might be less hacky to use a "static int initialized" here. Or if we
want to avoid hidden globals, put the logic into filter_refs() to decide
when to populate.

I don't think any of that needs to hold up this series, though.

>  int oidset_insert(struct oidset *set, const struct object_id *oid)
>  {
> -	struct oidmap_entry *entry;
> -
> -	if (!set->map.map.tablesize)
> -		oidmap_init(&set->map, 0);
> -	else if (oidset_contains(set, oid))
> -		return 1;
> -
> -	entry = xmalloc(sizeof(*entry));
> -	oidcpy(&entry->oid, oid);
> -
> -	oidmap_put(&set->map, entry);
> -	return 0;
> +	int added;
> +	kh_put_oid(&set->set, *oid, &added);
> +	return !added;
>  }

This actually does the check-and-insert in a single lookup, which is
nice. ;)

> diff --git a/oidset.h b/oidset.h
> index 40ec5f87fe..4b90540cd4 100644
> --- a/oidset.h
> +++ b/oidset.h
> [...]
> +KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)

This will declare these "static inline". Our other major "macros become
inline functions" code is commit-slab.h, and there we found it necessary
to add MAYBE_UNUSED. I wonder if we ought to be doing the same here (I
don't get any warnings, but I suspect sparse might complain).

It might be nice if these functions could hide inside oidset.c (and just
declare the struct here). It looks like we might be able to do that with
__KHASH_TYPE(), but the double-underscore implies that we're not
supposed to. ;)

I guess we also use a few of them in our inlines here. I'm not 100% sure
that oidset_* needs to be inlined either, but this is at least a pretty
faithful conversion of the original.

-Peff
René Scharfe Oct. 4, 2018, 5:56 a.m. UTC | #2
Am 03.10.2018 um 21:40 schrieb Jeff King:
> On Wed, Oct 03, 2018 at 03:16:39PM +0200, René Scharfe wrote:
>> diff --git a/fetch-pack.c b/fetch-pack.c
>> index 75047a4b2a..a839315726 100644
>> --- a/fetch-pack.c
>> +++ b/fetch-pack.c
>> @@ -536,7 +536,7 @@ static int tip_oids_contain(struct oidset *tip_oids,
>>  	 * add to "newlist" between calls, the additions will always be for
>>  	 * oids that are already in the set.
>>  	 */
>> -	if (!tip_oids->map.map.tablesize) {
>> +	if (!tip_oids->set.n_buckets) {
>>  		add_refs_to_oidset(tip_oids, unmatched);
>>  		add_refs_to_oidset(tip_oids, newlist);
>>  	}
> 
> This is a little intimate with the implementation of khash, but I think
> it's probably OK (and really no worse than what was there before).
> 
> As the comment above notes, I think we're really looking at the case
> where this gets populated on the first call, but not subsequent ones. It
> might be less hacky to use a "static int initialized" here. Or if we
> want to avoid hidden globals, put the logic into filter_refs() to decide
> when to populate.

Right.  I'd prefer the latter, but was unable to find a nice way that
still populates the oidset lazily.  It's certainly worth another look,
and a separate series.

>> diff --git a/oidset.h b/oidset.h
>> index 40ec5f87fe..4b90540cd4 100644
>> --- a/oidset.h
>> +++ b/oidset.h
>> [...]
>> +KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
> 
> This will declare these "static inline". Our other major "macros become
> inline functions" code is commit-slab.h, and there we found it necessary
> to add MAYBE_UNUSED. I wonder if we ought to be doing the same here (I
> don't get any warnings, but I suspect sparse might complain).

I doubt it (but didn't check) because khash.h defines kh_clear_##name(),
which we don't use it anywhere and there have been no complaints so far.
And if we wanted to add MAYBE_UNUSED then the right place for that would
be in KHASH_INIT, no?

> It might be nice if these functions could hide inside oidset.c (and just
> declare the struct here). It looks like we might be able to do that with
> __KHASH_TYPE(), but the double-underscore implies that we're not
> supposed to. ;)
> 
> I guess we also use a few of them in our inlines here. I'm not 100% sure
> that oidset_* needs to be inlined either, but this is at least a pretty
> faithful conversion of the original.

We could inline all of the oidset functions, following the spirit of
klib/khash.h.

Or we could uninline all of them and then may be able to clean up
oidset.h by using KHASH_DECLARE.  Perhaps we'd need to guard with an
"#ifndef THIS_IS_OIDSET_C" or similar to avoid a clash with KHASH_INIT.

Not sure if any of that would be a worthwhile improvement..

René
Jeff King Oct. 4, 2018, 6:48 a.m. UTC | #3
On Thu, Oct 04, 2018 at 07:56:44AM +0200, René Scharfe wrote:

> > As the comment above notes, I think we're really looking at the case
> > where this gets populated on the first call, but not subsequent ones. It
> > might be less hacky to use a "static int initialized" here. Or if we
> > want to avoid hidden globals, put the logic into filter_refs() to decide
> > when to populate.
> 
> Right.  I'd prefer the latter, but was unable to find a nice way that
> still populates the oidset lazily.  It's certainly worth another look,
> and a separate series.

It's a little awkward because the lazy load happens in a conditional.
You can fully encapsulate it like the patch below, but I actually don't
think it's really helping readability.

> >> +KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
> > 
> > This will declare these "static inline". Our other major "macros become
> > inline functions" code is commit-slab.h, and there we found it necessary
> > to add MAYBE_UNUSED. I wonder if we ought to be doing the same here (I
> > don't get any warnings, but I suspect sparse might complain).
> 
> I doubt it (but didn't check) because khash.h defines kh_clear_##name(),
> which we don't use it anywhere and there have been no complaints so far.
> And if we wanted to add MAYBE_UNUSED then the right place for that would
> be in KHASH_INIT, no?

Right, that would be the correct spot. I'm OK to leave it until somebody
complains. Looking at commit-slab again, its functions are straight
statics, not static inline. That's probably the important difference.

> > It might be nice if these functions could hide inside oidset.c (and just
> > declare the struct here). It looks like we might be able to do that with
> > __KHASH_TYPE(), but the double-underscore implies that we're not
> > supposed to. ;)
> > 
> > I guess we also use a few of them in our inlines here. I'm not 100% sure
> > that oidset_* needs to be inlined either, but this is at least a pretty
> > faithful conversion of the original.
> 
> We could inline all of the oidset functions, following the spirit of
> klib/khash.h.
> 
> Or we could uninline all of them and then may be able to clean up
> oidset.h by using KHASH_DECLARE.  Perhaps we'd need to guard with an
> "#ifndef THIS_IS_OIDSET_C" or similar to avoid a clash with KHASH_INIT.
> 
> Not sure if any of that would be a worthwhile improvement..

Unless we know something is a performance win to inline, I'd generally
prefer not to.

For a case like this with auto-generated functions, I'm mostly worried
about bloating the compiled code. Either with a bunch of inlined
non-trivial functions, or cases where the compiler says "this is too big
to inline" and generates an anonymous file-scope function, but we end up
with a bunch of duplicates, because we're generating the same functions
in a bunch of C files.

I may be worried about nothing, though. I don't know how clever the
compiler and linker can be there.

-Peff
Jeff King Oct. 4, 2018, 6:50 a.m. UTC | #4
On Thu, Oct 04, 2018 at 02:48:33AM -0400, Jeff King wrote:

> On Thu, Oct 04, 2018 at 07:56:44AM +0200, René Scharfe wrote:
> 
> > > As the comment above notes, I think we're really looking at the case
> > > where this gets populated on the first call, but not subsequent ones. It
> > > might be less hacky to use a "static int initialized" here. Or if we
> > > want to avoid hidden globals, put the logic into filter_refs() to decide
> > > when to populate.
> > 
> > Right.  I'd prefer the latter, but was unable to find a nice way that
> > still populates the oidset lazily.  It's certainly worth another look,
> > and a separate series.
> 
> It's a little awkward because the lazy load happens in a conditional.
> You can fully encapsulate it like the patch below, but I actually don't
> think it's really helping readability.

I forgot the patch, of course. ;)

I'm not really proposing this, just illustrating one direction (that I
think is kind of ugly). Notably it doesn't get rid of the tricky comment
in tip_oids_contain(), because that is explaining why the single load
works even on a list we're still adding to.

diff --git a/fetch-pack.c b/fetch-pack.c
index a839315726..a6212c8758 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -526,8 +526,14 @@ static void add_refs_to_oidset(struct oidset *oids, struct ref *refs)
 		oidset_insert(oids, &refs->old_oid);
 }
 
-static int tip_oids_contain(struct oidset *tip_oids,
-			    struct ref *unmatched, struct ref *newlist,
+struct lazy_tip_oids {
+	int loaded;
+	struct oidset oids;
+	struct ref *unmatched;
+	struct ref *newlist;
+};
+
+static int tip_oids_contain(struct lazy_tip_oids *tip_oids,
 			    const struct object_id *id)
 {
 	/*
@@ -536,11 +542,12 @@ static int tip_oids_contain(struct oidset *tip_oids,
 	 * add to "newlist" between calls, the additions will always be for
 	 * oids that are already in the set.
 	 */
-	if (!tip_oids->set.n_buckets) {
-		add_refs_to_oidset(tip_oids, unmatched);
-		add_refs_to_oidset(tip_oids, newlist);
+	if (!tip_oids->loaded) {
+		add_refs_to_oidset(&tip_oids->oids, tip_oids->unmatched);
+		add_refs_to_oidset(&tip_oids->oids, tip_oids->newlist);
+		tip_oids->loaded = 1;
 	}
-	return oidset_contains(tip_oids, id);
+	return oidset_contains(&tip_oids->oids, id);
 }
 
 static void filter_refs(struct fetch_pack_args *args,
@@ -551,7 +558,7 @@ static void filter_refs(struct fetch_pack_args *args,
 	struct ref **newtail = &newlist;
 	struct ref *unmatched = NULL;
 	struct ref *ref, *next;
-	struct oidset tip_oids = OIDSET_INIT;
+	struct lazy_tip_oids tip_oids = { 0 };
 	int i;
 
 	i = 0;
@@ -589,6 +596,9 @@ static void filter_refs(struct fetch_pack_args *args,
 		}
 	}
 
+	tip_oids.unmatched = unmatched;
+	tip_oids.newlist = newlist;
+
 	/* Append unmatched requests to the list */
 	for (i = 0; i < nr_sought; i++) {
 		struct object_id oid;
@@ -604,8 +614,7 @@ static void filter_refs(struct fetch_pack_args *args,
 
 		if ((allow_unadvertised_object_request &
 		     (ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1)) ||
-		    tip_oids_contain(&tip_oids, unmatched, newlist,
-				     &ref->old_oid)) {
+		    tip_oids_contain(&tip_oids, &ref->old_oid)) {
 			ref->match_status = REF_MATCHED;
 			*newtail = copy_ref(ref);
 			newtail = &(*newtail)->next;
@@ -614,7 +623,7 @@ static void filter_refs(struct fetch_pack_args *args,
 		}
 	}
 
-	oidset_clear(&tip_oids);
+	oidset_clear(&tip_oids.oids);
 	for (ref = unmatched; ref; ref = next) {
 		next = ref->next;
 		free(ref);
René Scharfe Oct. 4, 2018, 3:05 p.m. UTC | #5
Am 04.10.2018 um 08:48 schrieb Jeff King:
> On Thu, Oct 04, 2018 at 07:56:44AM +0200, René Scharfe wrote:
> 
>>> As the comment above notes, I think we're really looking at the case
>>> where this gets populated on the first call, but not subsequent ones. It
>>> might be less hacky to use a "static int initialized" here. Or if we
>>> want to avoid hidden globals, put the logic into filter_refs() to decide
>>> when to populate.
>>
>> Right.  I'd prefer the latter, but was unable to find a nice way that
>> still populates the oidset lazily.  It's certainly worth another look,
>> and a separate series.
> 
> It's a little awkward because the lazy load happens in a conditional.
> You can fully encapsulate it like the patch below, but I actually don't
> think it's really helping readability.

*Shudder*

>>> It might be nice if these functions could hide inside oidset.c (and just
>>> declare the struct here). It looks like we might be able to do that with
>>> __KHASH_TYPE(), but the double-underscore implies that we're not
>>> supposed to. ;)
>>>
>>> I guess we also use a few of them in our inlines here. I'm not 100% sure
>>> that oidset_* needs to be inlined either, but this is at least a pretty
>>> faithful conversion of the original.
>>
>> We could inline all of the oidset functions, following the spirit of
>> klib/khash.h.
>>
>> Or we could uninline all of them and then may be able to clean up
>> oidset.h by using KHASH_DECLARE.  Perhaps we'd need to guard with an
>> "#ifndef THIS_IS_OIDSET_C" or similar to avoid a clash with KHASH_INIT.
>>
>> Not sure if any of that would be a worthwhile improvement..
> 
> Unless we know something is a performance win to inline, I'd generally
> prefer not to.

Agreed.

> For a case like this with auto-generated functions, I'm mostly worried
> about bloating the compiled code. Either with a bunch of inlined
> non-trivial functions, or cases where the compiler says "this is too big
> to inline" and generates an anonymous file-scope function, but we end up
> with a bunch of duplicates, because we're generating the same functions
> in a bunch of C files.

The _iter_ functions look harmless in this regard, as the only use small
functions that are not even type-specific.

oidset_init() would better be moved to oidset.c, as the code for resizing
is quite big.

René
diff mbox series

Patch

diff --git a/fetch-pack.c b/fetch-pack.c
index 75047a4b2a..a839315726 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -536,7 +536,7 @@  static int tip_oids_contain(struct oidset *tip_oids,
 	 * add to "newlist" between calls, the additions will always be for
 	 * oids that are already in the set.
 	 */
-	if (!tip_oids->map.map.tablesize) {
+	if (!tip_oids->set.n_buckets) {
 		add_refs_to_oidset(tip_oids, unmatched);
 		add_refs_to_oidset(tip_oids, newlist);
 	}
diff --git a/oidset.c b/oidset.c
index 454c54f933..9836d427ef 100644
--- a/oidset.c
+++ b/oidset.c
@@ -3,38 +3,28 @@ 
 
 int oidset_contains(const struct oidset *set, const struct object_id *oid)
 {
-	if (!set->map.map.tablesize)
-		return 0;
-	return !!oidmap_get(&set->map, oid);
+	khiter_t pos = kh_get_oid(&set->set, *oid);
+	return pos != kh_end(&set->set);
 }
 
 int oidset_insert(struct oidset *set, const struct object_id *oid)
 {
-	struct oidmap_entry *entry;
-
-	if (!set->map.map.tablesize)
-		oidmap_init(&set->map, 0);
-	else if (oidset_contains(set, oid))
-		return 1;
-
-	entry = xmalloc(sizeof(*entry));
-	oidcpy(&entry->oid, oid);
-
-	oidmap_put(&set->map, entry);
-	return 0;
+	int added;
+	kh_put_oid(&set->set, *oid, &added);
+	return !added;
 }
 
 int oidset_remove(struct oidset *set, const struct object_id *oid)
 {
-	struct oidmap_entry *entry;
-
-	entry = oidmap_remove(&set->map, oid);
-	free(entry);
-
-	return (entry != NULL);
+	khiter_t pos = kh_get_oid(&set->set, *oid);
+	if (pos == kh_end(&set->set))
+		return 0;
+	kh_del_oid(&set->set, pos);
+	return 1;
 }
 
 void oidset_clear(struct oidset *set)
 {
-	oidmap_free(&set->map, 1);
+	kh_release_oid(&set->set);
+	oidset_init(set, 0);
 }
diff --git a/oidset.h b/oidset.h
index 40ec5f87fe..4b90540cd4 100644
--- a/oidset.h
+++ b/oidset.h
@@ -1,7 +1,8 @@ 
 #ifndef OIDSET_H
 #define OIDSET_H
 
-#include "oidmap.h"
+#include "hashmap.h"
+#include "khash.h"
 
 /**
  * This API is similar to sha1-array, in that it maintains a set of object ids
@@ -15,19 +16,33 @@ 
  *      table overhead.
  */
 
+static inline unsigned int oid_hash(struct object_id oid)
+{
+	return sha1hash(oid.hash);
+}
+
+static inline int oid_equal(struct object_id a, struct object_id b)
+{
+	return oideq(&a, &b);
+}
+
+KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
+
 /**
  * A single oidset; should be zero-initialized (or use OIDSET_INIT).
  */
 struct oidset {
-	struct oidmap map;
+	kh_oid_t set;
 };
 
-#define OIDSET_INIT { OIDMAP_INIT }
+#define OIDSET_INIT { { 0 } }
 
 
 static inline void oidset_init(struct oidset *set, size_t initial_size)
 {
-	oidmap_init(&set->map, initial_size);
+	memset(&set->set, 0, sizeof(set->set));
+	if (initial_size)
+		kh_resize_oid(&set->set, initial_size);
 }
 
 /**
@@ -58,19 +73,24 @@  int oidset_remove(struct oidset *set, const struct object_id *oid);
 void oidset_clear(struct oidset *set);
 
 struct oidset_iter {
-	struct oidmap_iter m_iter;
+	kh_oid_t *set;
+	khiter_t iter;
 };
 
 static inline void oidset_iter_init(struct oidset *set,
 				    struct oidset_iter *iter)
 {
-	oidmap_iter_init(&set->map, &iter->m_iter);
+	iter->set = &set->set;
+	iter->iter = kh_begin(iter->set);
 }
 
 static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
 {
-	struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter);
-	return e ? &e->oid : NULL;
+	for (; iter->iter != kh_end(iter->set); iter->iter++) {
+		if (kh_exist(iter->set, iter->iter))
+			return &kh_key(iter->set, iter->iter++);
+	}
+	return NULL;
 }
 
 static inline struct object_id *oidset_iter_first(struct oidset *set,