@@ -918,10 +918,16 @@ enum bloom_result check_modified_path_bloom_filter(struct repository *r,
for (i = 0; i < pathspec->nr; i++) {
struct pathspec_item *pi = &pathspec->items[i];
- if (bloom_filter_check_bits(&bf,
+ if (bf.nr_bits == GRAPH_MODIFIED_PATH_BLOOM_FILTER_EMBEDDED_NR_BITS) {
+ /* Gross! And potential alignment issues?! */
+ if ((*((const uint64_t*)bf.bits) & pi->modified_path_bloom_mask) == pi->modified_path_bloom_mask)
+ return BLOOM_POSSIBLY_YES;
+ } else {
+ if (bloom_filter_check_bits(&bf,
pi->modified_path_bloom_hashes,
pi->modified_path_bloom_hashes_nr))
- return BLOOM_POSSIBLY_YES;
+ return BLOOM_POSSIBLY_YES;
+ }
}
return BLOOM_DEFINITELY_NOT;
@@ -981,6 +987,7 @@ void init_pathspec_bloom_fields(struct repository *r,
size_t len = pi->len;
int path_component_nr = 0, j;
uint32_t *hashes;
+ struct bloom_filter embedded_bf;
/*
* Pathspec parsing has normalized away any consecutive
@@ -1012,6 +1019,12 @@ void init_pathspec_bloom_fields(struct repository *r,
graph->num_modified_path_bloom_hashes,
hashes);
}
+
+ embedded_bf.nr_bits = GRAPH_MODIFIED_PATH_BLOOM_FILTER_EMBEDDED_NR_BITS;
+ embedded_bf.bits = (uint8_t*) &pi->modified_path_bloom_mask;
+ bloom_filter_set_bits(&embedded_bf,
+ pi->modified_path_bloom_hashes,
+ pi->modified_path_bloom_hashes_nr);
}
pathspec->can_use_modified_path_bloom_filters = 1;
@@ -408,6 +408,7 @@ static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
item->attr_match_nr = 0;
item->modified_path_bloom_hashes_nr = 0;
item->modified_path_bloom_hashes = NULL;
+ item->modified_path_bloom_mask = 0;
/* PATHSPEC_LITERAL_PATH ignores magic */
if (flags & PATHSPEC_LITERAL_PATH) {
@@ -64,6 +64,7 @@ struct pathspec {
*/
uint32_t modified_path_bloom_hashes_nr;
uint32_t *modified_path_bloom_hashes;
+ uint64_t modified_path_bloom_mask;
} *items;
};
Since the previous patch we check the presence of all leading directories of a pathspec in modified path Bloom filters to significantly lower the false positive rate. This means that we are checking a lot of bit positions in the Bloom filters. However, as shown earlier in this series, a significant portion of commits have embedded modified path Bloom filters, all with the same size of 63 bits. Use a 64 bit mask to check all relevant bits in embedded modified path Bloom filters in one step. I don't have benchmark results at hand, but as far as I can recall some old results this reduces the time spent loading and checking modified path Bloom filters by up to 10%. Checking Bloom filters is of course only a small part of the whole runtime of a pathspec-limited revision walk, so the overall improvement is only about 1-2%. Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com> --- commit-graph.c | 17 +++++++++++++++-- pathspec.c | 1 + pathspec.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-)