@@ -816,6 +816,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
TEST_BUILTINS_OBJS += test-match-trees.o
TEST_BUILTINS_OBJS += test-mergesort.o
TEST_BUILTINS_OBJS += test-mktemp.o
+TEST_BUILTINS_OBJS += test-name-hash.o
TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-pack-mtimes.o
TEST_BUILTINS_OBJS += test-parse-options.o
new file mode 100644
@@ -0,0 +1,24 @@
+/*
+ * test-name-hash.c: Read a list of paths over stdin and report on their
+ * name-hash and full name-hash.
+ */
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "pack-objects.h"
+#include "strbuf.h"
+
+int cmd__name_hash(int argc UNUSED, const char **argv UNUSED)
+{
+ struct strbuf line = STRBUF_INIT;
+
+ while (!strbuf_getline(&line, stdin)) {
+ uint32_t name_hash = pack_name_hash(line.buf);
+ uint32_t full_hash = pack_full_name_hash(line.buf);
+
+ printf("%10"PRIu32"\t%10"PRIu32"\t%s\n", name_hash, full_hash, line.buf);
+ }
+
+ strbuf_release(&line);
+ return 0;
+}
@@ -44,6 +44,7 @@ static struct test_cmd cmds[] = {
{ "match-trees", cmd__match_trees },
{ "mergesort", cmd__mergesort },
{ "mktemp", cmd__mktemp },
+ { "name-hash", cmd__name_hash },
{ "online-cpus", cmd__online_cpus },
{ "pack-mtimes", cmd__pack_mtimes },
{ "parse-options", cmd__parse_options },
@@ -37,6 +37,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
int cmd__match_trees(int argc, const char **argv);
int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
+int cmd__name_hash(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
int cmd__pack_mtimes(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
new file mode 100755
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+test_description='Tests pack performance using bitmaps'
+. ./perf-lib.sh
+
+GIT_TEST_PASSING_SANITIZE_LEAK=0
+export GIT_TEST_PASSING_SANITIZE_LEAK
+
+test_perf_large_repo
+
+test_size 'paths at head' '
+ git ls-tree -r --name-only HEAD >path-list &&
+ wc -l <path-list
+'
+
+test_size 'number of distinct name-hashes' '
+ cat path-list | test-tool name-hash >name-hashes &&
+ cat name-hashes | awk "{ print \$1; }" | sort -n | uniq -c >name-hash-count &&
+ wc -l <name-hash-count
+'
+
+test_size 'number of distinct full-name-hashes' '
+ cat name-hashes | awk "{ print \$2; }" | sort -n | uniq -c >full-name-hash-count &&
+ wc -l <full-name-hash-count
+'
+
+test_size 'maximum multiplicity of name-hashes' '
+ cat name-hash-count | \
+ sort -nr | \
+ head -n 1 | \
+ awk "{ print \$1; }"
+'
+
+test_size 'maximum multiplicity of fullname-hashes' '
+ cat full-name-hash-count | \
+ sort -nr | \
+ head -n 1 | \
+ awk "{ print \$1; }"
+'
+
+test_done
@@ -27,6 +27,32 @@ has_any () {
grep -Ff "$1" "$2"
}
+# Since name-hash values are stored in the .bitmap files, add a test
+# that checks that the name-hash calculations are stable across versions.
+# Not exhaustive, but these hashing algorithms would be hard to change
+# without causing deviations here.
+test_expect_success 'name-hash value stability' '
+ cat >names <<-\EOF &&
+ first
+ second
+ third
+ one-long-enough-for-collisions
+ two-long-enough-for-collisions
+ EOF
+
+ test-tool name-hash <names >out &&
+
+ cat >expect <<-\EOF &&
+ 2582249472 3109209818 first
+ 2289942528 3781118409 second
+ 2300837888 3028707182 third
+ 2544516325 3241327563 one-long-enough-for-collisions
+ 2544516325 4207880830 two-long-enough-for-collisions
+ EOF
+
+ test_cmp expect out
+'
+
test_bitmap_cases () {
writeLookupTable=false
for i in "$@"