@@ -839,6 +839,7 @@ LIB_OBJS += base85.o
LIB_OBJS += bisect.o
LIB_OBJS += blame.o
LIB_OBJS += blob.o
+LIB_OBJS += bloom-filter.o
LIB_OBJS += branch.o
LIB_OBJS += bulk-checkin.o
LIB_OBJS += bundle.o
new file mode 100644
@@ -0,0 +1,91 @@
+#include "bloom-filter.h"
+
+void bloom_filter_init(struct bloom_filter *bf, unsigned int nr_hashes,
+ uint32_t nr_elements)
+{
+ /* n * k / ln(2) ≈ n * k / 0.69315 ≈ n * k * 10 / 7 */
+ uint32_t nr_bits = st_mult(st_mult(nr_elements, nr_hashes), 10) / 7;
+ uint32_t nr_bytes = st_add(nr_bits, 7) / 8;
+ /*
+ * But we round up to fully utilize all bytes, thus lowering the
+ * probability of false positives a bit.
+ */
+ bf->nr_bits = nr_bytes * 8;
+ bf->bits = xcalloc(nr_bytes, sizeof(*(bf->bits)));
+}
+
+void bloom_filter_init_with_size(struct bloom_filter *bf, uint32_t nr_bits)
+{
+ uint32_t nr_bytes = st_add(nr_bits, 7) / 8;
+ bf->nr_bits = nr_bits;
+ bf->bits = xcalloc(nr_bytes, sizeof(*(bf->bits)));
+}
+
+void bloom_filter_free(struct bloom_filter *bf)
+{
+ FREE_AND_NULL(bf->bits);
+ bf->nr_bits = 0;
+}
+
+uint32_t bloom_filter_bytes(struct bloom_filter *bf)
+{
+ return (bf->nr_bits + 7) / 8;
+}
+
+void bloom_filter_clear_all_bits(struct bloom_filter *bf)
+{
+ memset(bf->bits, 0, bloom_filter_bytes(bf));
+}
+
+void bloom_filter_set_all_bits(struct bloom_filter *bf)
+{
+ memset(bf->bits, 0xff, bloom_filter_bytes(bf));
+}
+
+static inline uint32_t bit_offset(uint32_t nr_bits, uint32_t hash)
+{
+ return hash % nr_bits;
+}
+
+static inline uint32_t byte_offset(uint32_t nr_bits, uint32_t bit_offset)
+{
+ return (nr_bits - 1) / 8 - bit_offset / 8;
+}
+
+static inline uint8_t byte_mask(uint32_t bit_offset)
+{
+ return 1 << (bit_offset % 8);
+}
+
+static inline void bloom_filter_set_one_bit(struct bloom_filter *bf,
+ uint32_t hash)
+{
+ uint32_t offset = bit_offset(bf->nr_bits, hash);
+ bf->bits[byte_offset(bf->nr_bits, offset)] |= byte_mask(offset);
+}
+
+void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *hashes,
+ unsigned int nr_hashes)
+{
+ unsigned int i;
+ for (i = 0; i < nr_hashes; i++)
+ bloom_filter_set_one_bit(bf, hashes[i]);
+}
+
+static inline int bloom_filter_check_one_bit(struct bloom_filter *bf,
+ uint32_t hash)
+{
+ uint32_t offset = bit_offset(bf->nr_bits, hash);
+ return bf->bits[byte_offset(bf->nr_bits, offset)] & byte_mask(offset);
+}
+
+enum bloom_result bloom_filter_check_bits(struct bloom_filter *bf,
+ const uint32_t *hashes,
+ unsigned int nr_hashes)
+{
+ unsigned int i;
+ for (i = 0; i < nr_hashes; i++)
+ if (!bloom_filter_check_one_bit(bf, hashes[i]))
+ return BLOOM_DEFINITELY_NOT;
+ return BLOOM_POSSIBLY_YES;
+}
new file mode 100644
@@ -0,0 +1,47 @@
+#ifndef BLOOM_FILTER_H
+#define BLOOM_FILTER_H
+
+#include "git-compat-util.h"
+
+enum bloom_result {
+ /*
+ * BLOOM_CANT_TELL is a value that a caller can use to report that
+ * a Bloom filter is not available; bloom_filter_check_bits() will
+ * never return it.
+ */
+ BLOOM_CANT_TELL = -1,
+ BLOOM_DEFINITELY_NOT = 0,
+ BLOOM_POSSIBLY_YES = 1
+};
+
+struct bloom_filter {
+ uint32_t nr_bits;
+ uint8_t *bits;
+};
+
+/*
+ * Initialize a Bloom filter with the number of bits that is (close to)
+ * optimal to hold the given number of elements using the given number
+ * of hashes per element.
+ */
+void bloom_filter_init(struct bloom_filter *bf, uint32_t nr_hashes,
+ uint32_t nr_elements);
+
+/* Initialize a Bloom filter with the given number of bits */
+void bloom_filter_init_with_size(struct bloom_filter *bf, uint32_t nr_bits);
+
+void bloom_filter_free(struct bloom_filter *bf);
+
+/* Return the size of the Bloom filter's bit array in bytes */
+uint32_t bloom_filter_bytes(struct bloom_filter *bf);
+
+void bloom_filter_clear_all_bits(struct bloom_filter *bf);
+void bloom_filter_set_all_bits(struct bloom_filter *bf);
+
+void bloom_filter_set_bits(struct bloom_filter *bf, const uint32_t *hashes,
+ unsigned int nr_hashes);
+enum bloom_result bloom_filter_check_bits(struct bloom_filter *bf,
+ const uint32_t *hashes,
+ unsigned int nr_hashes);
+
+#endif