Message ID | 20240126220655.395093-2-kent.overstreet@linux.dev (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/5] mean and variance: Promote to lib/math | expand |
On Fri, Jan 26, 2024 at 05:06:52PM -0500, Kent Overstreet wrote: > eytzinger trees are a faster alternative to binary search. They're a bit > more expensive to setup, but lookups perform much better assuming the > tree isn't entirely in cache. > > Binary search is a worst case scenario for branch prediction and > prefetching, but eytzinger trees have children adjacent in memory and > thus we can prefetch before knowing the result of a comparison. > > An eytzinger tree is a binary tree laid out in an array, with the same > geometry as the usual binary heap construction, but used as a search > tree instead. > > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> This looks more or less like what I remember of building heaps and squinting at my horrible handwritten notes about eytzinger trees from back in the day. Reviewed-by: Darrick J. Wong <djwong@kernel.org> --D > --- > fs/bcachefs/bset.c | 2 +- > fs/bcachefs/journal_seq_blacklist.c | 6 +- > fs/bcachefs/replicas.c | 17 ++- > fs/bcachefs/replicas.h | 3 +- > fs/bcachefs/super-io.h | 2 +- > fs/bcachefs/util.c | 145 +-------------------- > fs/bcachefs/util.h | 4 - > {fs/bcachefs => include/linux}/eytzinger.h | 56 ++++---- > lib/sort.c | 85 ++++++++++++ > 9 files changed, 136 insertions(+), 184 deletions(-) > rename {fs/bcachefs => include/linux}/eytzinger.h (78%) > > diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c > index 3fd1085b6c61..1d77aa55d641 100644 > --- a/fs/bcachefs/bset.c > +++ b/fs/bcachefs/bset.c > @@ -9,12 +9,12 @@ > #include "bcachefs.h" > #include "btree_cache.h" > #include "bset.h" > -#include "eytzinger.h" > #include "trace.h" > #include "util.h" > > #include <asm/unaligned.h> > #include <linux/console.h> > +#include <linux/eytzinger.h> > #include <linux/random.h> > #include <linux/prefetch.h> > > diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c > index 0200e299cfbb..024c9b1b323f 100644 > --- a/fs/bcachefs/journal_seq_blacklist.c > +++ b/fs/bcachefs/journal_seq_blacklist.c > @@ -2,10 +2,11 @@ > > #include "bcachefs.h" > #include "btree_iter.h" > -#include "eytzinger.h" > #include "journal_seq_blacklist.h" > #include "super-io.h" > > +#include <linux/eytzinger.h> > + > /* > * journal_seq_blacklist machinery: > * > @@ -119,8 +120,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) > return ret ?: bch2_blacklist_table_initialize(c); > } > > -static int journal_seq_blacklist_table_cmp(const void *_l, > - const void *_r, size_t size) > +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) > { > const struct journal_seq_blacklist_table_entry *l = _l; > const struct journal_seq_blacklist_table_entry *r = _r; > diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c > index cc2672c12031..75fdce373f76 100644 > --- a/fs/bcachefs/replicas.c > +++ b/fs/bcachefs/replicas.c > @@ -6,12 +6,15 @@ > #include "replicas.h" > #include "super-io.h" > > +#include <linux/sort.h> > + > static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, > struct bch_replicas_cpu *); > > /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ > -static int bch2_memcmp(const void *l, const void *r, size_t size) > +static int bch2_memcmp(const void *l, const void *r, const void *priv) > { > + size_t size = (size_t) priv; > return memcmp(l, r, size); > } > > @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) > > static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) > { > - eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); > + eytzinger0_sort_r(r->entries, r->nr, r->entry_size, > + bch2_memcmp, NULL, (void *)(size_t)r->entry_size); > } > > static void bch2_replicas_entry_v0_to_text(struct printbuf *out, > @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, > { > unsigned i; > > - sort_cmp_size(cpu_r->entries, > - cpu_r->nr, > - cpu_r->entry_size, > - bch2_memcmp, NULL); > + sort_r(cpu_r->entries, > + cpu_r->nr, > + cpu_r->entry_size, > + bch2_memcmp, NULL, > + (void *)(size_t)cpu_r->entry_size); > > for (i = 0; i < cpu_r->nr; i++) { > struct bch_replicas_entry_v1 *e = > diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h > index 654a4b26d3a3..983cce782ac2 100644 > --- a/fs/bcachefs/replicas.h > +++ b/fs/bcachefs/replicas.h > @@ -3,9 +3,10 @@ > #define _BCACHEFS_REPLICAS_H > > #include "bkey.h" > -#include "eytzinger.h" > #include "replicas_types.h" > > +#include <linux/eytzinger.h> > + > void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); > void bch2_replicas_entry_to_text(struct printbuf *, > struct bch_replicas_entry_v1 *); > diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h > index 95e80e06316b..f37620919e11 100644 > --- a/fs/bcachefs/super-io.h > +++ b/fs/bcachefs/super-io.h > @@ -3,12 +3,12 @@ > #define _BCACHEFS_SUPER_IO_H > > #include "extents.h" > -#include "eytzinger.h" > #include "super_types.h" > #include "super.h" > #include "sb-members.h" > > #include <asm/byteorder.h> > +#include <linux/eytzinger.h> > > static inline bool bch2_version_compatible(u16 version) > { > diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c > index d7ea95abb9df..c7cf9c6fcf9a 100644 > --- a/fs/bcachefs/util.c > +++ b/fs/bcachefs/util.c > @@ -11,6 +11,7 @@ > #include <linux/console.h> > #include <linux/ctype.h> > #include <linux/debugfs.h> > +#include <linux/eytzinger.h> > #include <linux/freezer.h> > #include <linux/kthread.h> > #include <linux/log2.h> > @@ -24,7 +25,6 @@ > #include <linux/sched/clock.h> > #include <linux/mean_and_variance.h> > > -#include "eytzinger.h" > #include "util.h" > > static const char si_units[] = "?kMGTPEZY"; > @@ -863,149 +863,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) > } > } > > -static int alignment_ok(const void *base, size_t align) > -{ > - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > - ((unsigned long)base & (align - 1)) == 0; > -} > - > -static void u32_swap(void *a, void *b, size_t size) > -{ > - u32 t = *(u32 *)a; > - *(u32 *)a = *(u32 *)b; > - *(u32 *)b = t; > -} > - > -static void u64_swap(void *a, void *b, size_t size) > -{ > - u64 t = *(u64 *)a; > - *(u64 *)a = *(u64 *)b; > - *(u64 *)b = t; > -} > - > -static void generic_swap(void *a, void *b, size_t size) > -{ > - char t; > - > - do { > - t = *(char *)a; > - *(char *)a++ = *(char *)b; > - *(char *)b++ = t; > - } while (--size > 0); > -} > - > -static inline int do_cmp(void *base, size_t n, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - size_t l, size_t r) > -{ > - return cmp_func(base + inorder_to_eytzinger0(l, n) * size, > - base + inorder_to_eytzinger0(r, n) * size, > - size); > -} > - > -static inline void do_swap(void *base, size_t n, size_t size, > - void (*swap_func)(void *, void *, size_t), > - size_t l, size_t r) > -{ > - swap_func(base + inorder_to_eytzinger0(l, n) * size, > - base + inorder_to_eytzinger0(r, n) * size, > - size); > -} > - > -void eytzinger0_sort(void *base, size_t n, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)) > -{ > - int i, c, r; > - > - if (!swap_func) { > - if (size == 4 && alignment_ok(base, 4)) > - swap_func = u32_swap; > - else if (size == 8 && alignment_ok(base, 8)) > - swap_func = u64_swap; > - else > - swap_func = generic_swap; > - } > - > - /* heapify */ > - for (i = n / 2 - 1; i >= 0; --i) { > - for (r = i; r * 2 + 1 < n; r = c) { > - c = r * 2 + 1; > - > - if (c + 1 < n && > - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) > - c++; > - > - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) > - break; > - > - do_swap(base, n, size, swap_func, r, c); > - } > - } > - > - /* sort */ > - for (i = n - 1; i > 0; --i) { > - do_swap(base, n, size, swap_func, 0, i); > - > - for (r = 0; r * 2 + 1 < i; r = c) { > - c = r * 2 + 1; > - > - if (c + 1 < i && > - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) > - c++; > - > - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) > - break; > - > - do_swap(base, n, size, swap_func, r, c); > - } > - } > -} > - > -void sort_cmp_size(void *base, size_t num, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t size)) > -{ > - /* pre-scale counters for performance */ > - int i = (num/2 - 1) * size, n = num * size, c, r; > - > - if (!swap_func) { > - if (size == 4 && alignment_ok(base, 4)) > - swap_func = u32_swap; > - else if (size == 8 && alignment_ok(base, 8)) > - swap_func = u64_swap; > - else > - swap_func = generic_swap; > - } > - > - /* heapify */ > - for ( ; i >= 0; i -= size) { > - for (r = i; r * 2 + size < n; r = c) { > - c = r * 2 + size; > - if (c < n - size && > - cmp_func(base + c, base + c + size, size) < 0) > - c += size; > - if (cmp_func(base + r, base + c, size) >= 0) > - break; > - swap_func(base + r, base + c, size); > - } > - } > - > - /* sort */ > - for (i = n - size; i > 0; i -= size) { > - swap_func(base, base + i, size); > - for (r = 0; r * 2 + size < i; r = c) { > - c = r * 2 + size; > - if (c < i - size && > - cmp_func(base + c, base + c + size, size) < 0) > - c += size; > - if (cmp_func(base + r, base + c, size) >= 0) > - break; > - swap_func(base + r, base + c, size); > - } > - } > -} > - > static void mempool_free_vp(void *element, void *pool_data) > { > size_t size = (size_t) pool_data; > diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h > index 0059481995ef..c3b11c3d24ea 100644 > --- a/fs/bcachefs/util.h > +++ b/fs/bcachefs/util.h > @@ -737,10 +737,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) > memset(s + bytes, c, rem); > } > > -void sort_cmp_size(void *base, size_t num, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)); > - > /* just the memmove, doesn't update @_nr */ > #define __array_insert_item(_array, _nr, _pos) \ > memmove(&(_array)[(_pos) + 1], \ > diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h > similarity index 78% > rename from fs/bcachefs/eytzinger.h > rename to include/linux/eytzinger.h > index b04750dbf870..9565a5c26cd5 100644 > --- a/fs/bcachefs/eytzinger.h > +++ b/include/linux/eytzinger.h > @@ -1,27 +1,37 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > -#ifndef _EYTZINGER_H > -#define _EYTZINGER_H > +#ifndef _LINUX_EYTZINGER_H > +#define _LINUX_EYTZINGER_H > > #include <linux/bitops.h> > #include <linux/log2.h> > > -#include "util.h" > +#ifdef EYTZINGER_DEBUG > +#define EYTZINGER_BUG_ON(cond) BUG_ON(cond) > +#else > +#define EYTZINGER_BUG_ON(cond) > +#endif > > /* > * Traversal for trees in eytzinger layout - a full binary tree layed out in an > - * array > - */ > - > -/* > - * One based indexing version: > + * array. > * > - * With one based indexing each level of the tree starts at a power of two - > - * good for cacheline alignment: > + * Consider using an eytzinger tree any time you would otherwise be doing binary > + * search over an array. Binary search is a worst case scenario for branch > + * prediction and prefetching, but in an eytzinger tree every node's children > + * are adjacent in memory, thus we can prefetch children before knowing the > + * result of the comparison, assuming multiple nodes fit on a cacheline. > + * > + * Two variants are provided, for one based indexing and zero based indexing. > + * > + * Zero based indexing is more convenient, but one based indexing has better > + * alignment and thus better performance because each new level of the tree > + * starts at a power of two, and thus if element 0 was cacheline aligned, each > + * new level will be as well. > */ > > static inline unsigned eytzinger1_child(unsigned i, unsigned child) > { > - EBUG_ON(child > 1); > + EYTZINGER_BUG_ON(child > 1); > > return (i << 1) + child; > } > @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size) > > static inline unsigned eytzinger1_next(unsigned i, unsigned size) > { > - EBUG_ON(i > size); > + EYTZINGER_BUG_ON(i > size); > > if (eytzinger1_right_child(i) <= size) { > i = eytzinger1_right_child(i); > @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) > > static inline unsigned eytzinger1_prev(unsigned i, unsigned size) > { > - EBUG_ON(i > size); > + EYTZINGER_BUG_ON(i > size); > > if (eytzinger1_left_child(i) <= size) { > i = eytzinger1_left_child(i) + 1; > @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, > unsigned shift = __fls(size) - b; > int s; > > - EBUG_ON(!i || i > size); > + EYTZINGER_BUG_ON(!i || i > size); > > i ^= 1U << b; > i <<= 1; > @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, > unsigned shift; > int s; > > - EBUG_ON(!i || i > size); > + EYTZINGER_BUG_ON(!i || i > size); > > /* > * sign bit trick: > @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) > > static inline unsigned eytzinger0_child(unsigned i, unsigned child) > { > - EBUG_ON(child > 1); > + EYTZINGER_BUG_ON(child > 1); > > return (i << 1) + 1 + child; > } > @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) > (_i) != -1; \ > (_i) = eytzinger0_next((_i), (_size))) > > -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); > - > /* return greatest node <= @search, or -1 if not found */ > static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > - eytzinger_cmp_fn cmp, const void *search) > + cmp_func_t cmp, const void *search) > { > unsigned i, n = 0; > > @@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > > do { > i = n; > - n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); > + n = eytzinger0_child(i, cmp(search, base + i * size) >= 0); > } while (n < nr); > > if (n & 1) { > @@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > _i; \ > }) > > -void eytzinger0_sort(void *, size_t, size_t, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)); > +void eytzinger0_sort_r(void *, size_t, size_t, > + cmp_r_func_t, swap_r_func_t, const void *); > +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); > > -#endif /* _EYTZINGER_H */ > +#endif /* _LINUX_EYTZINGER_H */ > diff --git a/lib/sort.c b/lib/sort.c > index b399bf10d675..3dfa83d86bbb 100644 > --- a/lib/sort.c > +++ b/lib/sort.c > @@ -290,3 +290,88 @@ void sort(void *base, size_t num, size_t size, > return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); > } > EXPORT_SYMBOL(sort); > + > +#include <linux/eytzinger.h> > + > +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size, > + cmp_r_func_t cmp_func, const void *priv, > + size_t l, size_t r) > +{ > + return do_cmp(base + inorder_to_eytzinger0(l, n) * size, > + base + inorder_to_eytzinger0(r, n) * size, > + cmp_func, priv); > +} > + > +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size, > + swap_r_func_t swap_func, const void *priv, > + size_t l, size_t r) > +{ > + do_swap(base + inorder_to_eytzinger0(l, n) * size, > + base + inorder_to_eytzinger0(r, n) * size, > + size, swap_func, priv); > +} > + > +void eytzinger0_sort_r(void *base, size_t n, size_t size, > + cmp_r_func_t cmp_func, > + swap_r_func_t swap_func, > + const void *priv) > +{ > + int i, c, r; > + > + if (!swap_func) { > + if (is_aligned(base, size, 8)) > + swap_func = SWAP_WORDS_64; > + else if (is_aligned(base, size, 4)) > + swap_func = SWAP_WORDS_32; > + else > + swap_func = SWAP_BYTES; > + } > + > + /* heapify */ > + for (i = n / 2 - 1; i >= 0; --i) { > + for (r = i; r * 2 + 1 < n; r = c) { > + c = r * 2 + 1; > + > + if (c + 1 < n && > + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) > + c++; > + > + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) > + break; > + > + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); > + } > + } > + > + /* sort */ > + for (i = n - 1; i > 0; --i) { > + eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); > + > + for (r = 0; r * 2 + 1 < i; r = c) { > + c = r * 2 + 1; > + > + if (c + 1 < i && > + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) > + c++; > + > + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) > + break; > + > + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); > + } > + } > +} > +EXPORT_SYMBOL_GPL(eytzinger0_sort_r); > + > +void eytzinger0_sort(void *base, size_t n, size_t size, > + cmp_func_t cmp_func, > + swap_func_t swap_func) > +{ > + struct wrapper w = { > + .cmp = cmp_func, > + .swap = swap_func, > + }; > + > + return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); > +} > +EXPORT_SYMBOL_GPL(eytzinger0_sort); > -- > 2.43.0 > >
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 3fd1085b6c61..1d77aa55d641 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -9,12 +9,12 @@ #include "bcachefs.h" #include "btree_cache.h" #include "bset.h" -#include "eytzinger.h" #include "trace.h" #include "util.h" #include <asm/unaligned.h> #include <linux/console.h> +#include <linux/eytzinger.h> #include <linux/random.h> #include <linux/prefetch.h> diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index 0200e299cfbb..024c9b1b323f 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -2,10 +2,11 @@ #include "bcachefs.h" #include "btree_iter.h" -#include "eytzinger.h" #include "journal_seq_blacklist.h" #include "super-io.h" +#include <linux/eytzinger.h> + /* * journal_seq_blacklist machinery: * @@ -119,8 +120,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) return ret ?: bch2_blacklist_table_initialize(c); } -static int journal_seq_blacklist_table_cmp(const void *_l, - const void *_r, size_t size) +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) { const struct journal_seq_blacklist_table_entry *l = _l; const struct journal_seq_blacklist_table_entry *r = _r; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index cc2672c12031..75fdce373f76 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -6,12 +6,15 @@ #include "replicas.h" #include "super-io.h" +#include <linux/sort.h> + static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, struct bch_replicas_cpu *); /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ -static int bch2_memcmp(const void *l, const void *r, size_t size) +static int bch2_memcmp(const void *l, const void *r, const void *priv) { + size_t size = (size_t) priv; return memcmp(l, r, size); } @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) { - eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); + eytzinger0_sort_r(r->entries, r->nr, r->entry_size, + bch2_memcmp, NULL, (void *)(size_t)r->entry_size); } static void bch2_replicas_entry_v0_to_text(struct printbuf *out, @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, { unsigned i; - sort_cmp_size(cpu_r->entries, - cpu_r->nr, - cpu_r->entry_size, - bch2_memcmp, NULL); + sort_r(cpu_r->entries, + cpu_r->nr, + cpu_r->entry_size, + bch2_memcmp, NULL, + (void *)(size_t)cpu_r->entry_size); for (i = 0; i < cpu_r->nr; i++) { struct bch_replicas_entry_v1 *e = diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 654a4b26d3a3..983cce782ac2 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -3,9 +3,10 @@ #define _BCACHEFS_REPLICAS_H #include "bkey.h" -#include "eytzinger.h" #include "replicas_types.h" +#include <linux/eytzinger.h> + void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); void bch2_replicas_entry_to_text(struct printbuf *, struct bch_replicas_entry_v1 *); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 95e80e06316b..f37620919e11 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -3,12 +3,12 @@ #define _BCACHEFS_SUPER_IO_H #include "extents.h" -#include "eytzinger.h" #include "super_types.h" #include "super.h" #include "sb-members.h" #include <asm/byteorder.h> +#include <linux/eytzinger.h> static inline bool bch2_version_compatible(u16 version) { diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index d7ea95abb9df..c7cf9c6fcf9a 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -11,6 +11,7 @@ #include <linux/console.h> #include <linux/ctype.h> #include <linux/debugfs.h> +#include <linux/eytzinger.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/log2.h> @@ -24,7 +25,6 @@ #include <linux/sched/clock.h> #include <linux/mean_and_variance.h> -#include "eytzinger.h" #include "util.h" static const char si_units[] = "?kMGTPEZY"; @@ -863,149 +863,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) } } -static int alignment_ok(const void *base, size_t align) -{ - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || - ((unsigned long)base & (align - 1)) == 0; -} - -static void u32_swap(void *a, void *b, size_t size) -{ - u32 t = *(u32 *)a; - *(u32 *)a = *(u32 *)b; - *(u32 *)b = t; -} - -static void u64_swap(void *a, void *b, size_t size) -{ - u64 t = *(u64 *)a; - *(u64 *)a = *(u64 *)b; - *(u64 *)b = t; -} - -static void generic_swap(void *a, void *b, size_t size) -{ - char t; - - do { - t = *(char *)a; - *(char *)a++ = *(char *)b; - *(char *)b++ = t; - } while (--size > 0); -} - -static inline int do_cmp(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - size_t l, size_t r) -{ - return cmp_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -static inline void do_swap(void *base, size_t n, size_t size, - void (*swap_func)(void *, void *, size_t), - size_t l, size_t r) -{ - swap_func(base + inorder_to_eytzinger0(l, n) * size, - base + inorder_to_eytzinger0(r, n) * size, - size); -} - -void eytzinger0_sort(void *base, size_t n, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)) -{ - int i, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for (i = n / 2 - 1; i >= 0; --i) { - for (r = i; r * 2 + 1 < n; r = c) { - c = r * 2 + 1; - - if (c + 1 < n && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } - - /* sort */ - for (i = n - 1; i > 0; --i) { - do_swap(base, n, size, swap_func, 0, i); - - for (r = 0; r * 2 + 1 < i; r = c) { - c = r * 2 + 1; - - if (c + 1 < i && - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) - c++; - - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) - break; - - do_swap(base, n, size, swap_func, r, c); - } - } -} - -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t size)) -{ - /* pre-scale counters for performance */ - int i = (num/2 - 1) * size, n = num * size, c, r; - - if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; - else - swap_func = generic_swap; - } - - /* heapify */ - for ( ; i >= 0; i -= size) { - for (r = i; r * 2 + size < n; r = c) { - c = r * 2 + size; - if (c < n - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } - - /* sort */ - for (i = n - size; i > 0; i -= size) { - swap_func(base, base + i, size); - for (r = 0; r * 2 + size < i; r = c) { - c = r * 2 + size; - if (c < i - size && - cmp_func(base + c, base + c + size, size) < 0) - c += size; - if (cmp_func(base + r, base + c, size) >= 0) - break; - swap_func(base + r, base + c, size); - } - } -} - static void mempool_free_vp(void *element, void *pool_data) { size_t size = (size_t) pool_data; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 0059481995ef..c3b11c3d24ea 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -737,10 +737,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) memset(s + bytes, c, rem); } -void sort_cmp_size(void *base, size_t num, size_t size, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); - /* just the memmove, doesn't update @_nr */ #define __array_insert_item(_array, _nr, _pos) \ memmove(&(_array)[(_pos) + 1], \ diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h similarity index 78% rename from fs/bcachefs/eytzinger.h rename to include/linux/eytzinger.h index b04750dbf870..9565a5c26cd5 100644 --- a/fs/bcachefs/eytzinger.h +++ b/include/linux/eytzinger.h @@ -1,27 +1,37 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _EYTZINGER_H -#define _EYTZINGER_H +#ifndef _LINUX_EYTZINGER_H +#define _LINUX_EYTZINGER_H #include <linux/bitops.h> #include <linux/log2.h> -#include "util.h" +#ifdef EYTZINGER_DEBUG +#define EYTZINGER_BUG_ON(cond) BUG_ON(cond) +#else +#define EYTZINGER_BUG_ON(cond) +#endif /* * Traversal for trees in eytzinger layout - a full binary tree layed out in an - * array - */ - -/* - * One based indexing version: + * array. * - * With one based indexing each level of the tree starts at a power of two - - * good for cacheline alignment: + * Consider using an eytzinger tree any time you would otherwise be doing binary + * search over an array. Binary search is a worst case scenario for branch + * prediction and prefetching, but in an eytzinger tree every node's children + * are adjacent in memory, thus we can prefetch children before knowing the + * result of the comparison, assuming multiple nodes fit on a cacheline. + * + * Two variants are provided, for one based indexing and zero based indexing. + * + * Zero based indexing is more convenient, but one based indexing has better + * alignment and thus better performance because each new level of the tree + * starts at a power of two, and thus if element 0 was cacheline aligned, each + * new level will be as well. */ static inline unsigned eytzinger1_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + child; } @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size) static inline unsigned eytzinger1_next(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_right_child(i) <= size) { i = eytzinger1_right_child(i); @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) static inline unsigned eytzinger1_prev(unsigned i, unsigned size) { - EBUG_ON(i > size); + EYTZINGER_BUG_ON(i > size); if (eytzinger1_left_child(i) <= size) { i = eytzinger1_left_child(i) + 1; @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, unsigned shift = __fls(size) - b; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); i ^= 1U << b; i <<= 1; @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, unsigned shift; int s; - EBUG_ON(!i || i > size); + EYTZINGER_BUG_ON(!i || i > size); /* * sign bit trick: @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) static inline unsigned eytzinger0_child(unsigned i, unsigned child) { - EBUG_ON(child > 1); + EYTZINGER_BUG_ON(child > 1); return (i << 1) + 1 + child; } @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) (_i) != -1; \ (_i) = eytzinger0_next((_i), (_size))) -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); - /* return greatest node <= @search, or -1 if not found */ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, - eytzinger_cmp_fn cmp, const void *search) + cmp_func_t cmp, const void *search) { unsigned i, n = 0; @@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, do { i = n; - n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); + n = eytzinger0_child(i, cmp(search, base + i * size) >= 0); } while (n < nr); if (n & 1) { @@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, _i; \ }) -void eytzinger0_sort(void *, size_t, size_t, - int (*cmp_func)(const void *, const void *, size_t), - void (*swap_func)(void *, void *, size_t)); +void eytzinger0_sort_r(void *, size_t, size_t, + cmp_r_func_t, swap_r_func_t, const void *); +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); -#endif /* _EYTZINGER_H */ +#endif /* _LINUX_EYTZINGER_H */ diff --git a/lib/sort.c b/lib/sort.c index b399bf10d675..3dfa83d86bbb 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -290,3 +290,88 @@ void sort(void *base, size_t num, size_t size, return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); } EXPORT_SYMBOL(sort); + +#include <linux/eytzinger.h> + +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, const void *priv, + size_t l, size_t r) +{ + return do_cmp(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + cmp_func, priv); +} + +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size, + swap_r_func_t swap_func, const void *priv, + size_t l, size_t r) +{ + do_swap(base + inorder_to_eytzinger0(l, n) * size, + base + inorder_to_eytzinger0(r, n) * size, + size, swap_func, priv); +} + +void eytzinger0_sort_r(void *base, size_t n, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv) +{ + int i, c, r; + + if (!swap_func) { + if (is_aligned(base, size, 8)) + swap_func = SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + swap_func = SWAP_WORDS_32; + else + swap_func = SWAP_BYTES; + } + + /* heapify */ + for (i = n / 2 - 1; i >= 0; --i) { + for (r = i; r * 2 + 1 < n; r = c) { + c = r * 2 + 1; + + if (c + 1 < n && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } + + /* sort */ + for (i = n - 1; i > 0; --i) { + eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); + + for (r = 0; r * 2 + 1 < i; r = c) { + c = r * 2 + 1; + + if (c + 1 < i && + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) + c++; + + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) + break; + + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); + } + } +} +EXPORT_SYMBOL_GPL(eytzinger0_sort_r); + +void eytzinger0_sort(void *base, size_t n, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func) +{ + struct wrapper w = { + .cmp = cmp_func, + .swap = swap_func, + }; + + return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); +} +EXPORT_SYMBOL_GPL(eytzinger0_sort);
eytzinger trees are a faster alternative to binary search. They're a bit more expensive to setup, but lookups perform much better assuming the tree isn't entirely in cache. Binary search is a worst case scenario for branch prediction and prefetching, but eytzinger trees have children adjacent in memory and thus we can prefetch before knowing the result of a comparison. An eytzinger tree is a binary tree laid out in an array, with the same geometry as the usual binary heap construction, but used as a search tree instead. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/bset.c | 2 +- fs/bcachefs/journal_seq_blacklist.c | 6 +- fs/bcachefs/replicas.c | 17 ++- fs/bcachefs/replicas.h | 3 +- fs/bcachefs/super-io.h | 2 +- fs/bcachefs/util.c | 145 +-------------------- fs/bcachefs/util.h | 4 - {fs/bcachefs => include/linux}/eytzinger.h | 56 ++++---- lib/sort.c | 85 ++++++++++++ 9 files changed, 136 insertions(+), 184 deletions(-) rename {fs/bcachefs => include/linux}/eytzinger.h (78%)