@@ -827,6 +827,7 @@ TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
TEST_BUILTINS_OBJS += test-partial-clone.o
TEST_BUILTINS_OBJS += test-path-utils.o
TEST_BUILTINS_OBJS += test-path-walk.o
+TEST_BUILTINS_OBJS += test-pathspec.o
TEST_BUILTINS_OBJS += test-pcre2-config.o
TEST_BUILTINS_OBJS += test-pkt-line.o
TEST_BUILTINS_OBJS += test-proc-receive.o
@@ -17,6 +17,125 @@
#include "quote.h"
#include "wildmatch.h"
+/*
+ * This is basically a strcmp, but we do not want the caller
+ * to have to terminate "a", so we pretend as if it had a NUL.
+ */
+static int pathspec_trie_cmp(const char *a, size_t a_len,
+ const char *b)
+{
+ int ret = strncmp(a, b, a_len);
+ return ret ?
+ ret :
+ (unsigned char)0 - (unsigned char )b[a_len];
+}
+
+static struct pathspec_trie *pathspec_trie_alloc(const char *path, size_t len)
+{
+ struct pathspec_trie *ret = xcalloc(1, sizeof(*ret) + len);
+ memcpy(ret->path, path, len);
+ return ret;
+}
+
+/*
+ * Add "path" to the trie rooted at "t".
+ */
+static void pathspec_trie_add(struct pathspec_trie *t, const char *path)
+{
+ /*
+ * Special case the empty path (i.e., "."), as our splitting algorithm
+ * below assumes at least one component.
+ */
+ if (!*path) {
+ t->terminal = 1;
+ return;
+ }
+
+ while (1) {
+ const char *slash = strchrnul(path, '/');
+ size_t len = slash - path;
+ int pos;
+
+ pos = pathspec_trie_lookup(t, path, len);
+ if (pos < 0) {
+ ALLOC_GROW(t->entries, t->nr + 1, t->alloc);
+
+ pos = -pos - 1;
+ if (pos < t->nr)
+ memmove(t->entries + pos + 1,
+ t->entries + pos,
+ sizeof(*t->entries) * (t->nr - pos));
+ t->entries[pos] = pathspec_trie_alloc(path, len);
+ t->nr++;
+ }
+
+ t = t->entries[pos];
+ path += len;
+
+ if (!*path) {
+ t->must_be_dir = 0;
+ t->terminal = 1;
+ return;
+ }
+
+ while (*path == '/')
+ path++;
+ if (!*path) {
+ /*
+ * if we were already a terminal, then do not set
+ * must_be_dir; we are "foo/", but we already had a
+ * pathspec "foo", which is a superset of us.
+ */
+ if (!t->terminal)
+ t->must_be_dir = 1;
+ t->terminal = 1;
+ return;
+ }
+ }
+}
+
+struct pathspec_trie *pathspec_trie_build(const struct pathspec *pathspec)
+{
+ struct pathspec_trie *ret;
+ int i;
+
+ /* we only make a trie for plain-vanilla pathspecs */
+ if (pathspec->has_wildcard || (pathspec->magic & ~PATHSPEC_LITERAL))
+ return NULL;
+
+ ret = pathspec_trie_alloc("", 0);
+
+ /*
+ * XXX we could construct the trie more efficiently by creating each
+ * node with all of its entries in sorted order. But this is much
+ * simpler, and since we only do this once at the start of a traversal,
+ * it's probably fast enough.
+ */
+ for (i = 0; i < pathspec->nr; i++)
+ pathspec_trie_add(ret, pathspec->items[i].match);
+
+ return ret;
+}
+
+int pathspec_trie_lookup(const struct pathspec_trie *parent,
+ const char *path, size_t len)
+{
+ int lo = 0, hi = parent->nr;
+ while (lo < hi) {
+ int mi = lo + ((hi - lo) / 2);
+ int cmp;
+
+ cmp = pathspec_trie_cmp(path, len, parent->entries[mi]->path);
+ if (!cmp)
+ return mi;
+ if (cmp < 0)
+ hi = mi;
+ else
+ lo = mi + 1;
+ }
+ return -lo - 1;
+}
+
/*
* Finds which of the given pathspecs match items in the index.
*
@@ -2,6 +2,7 @@
#define PATHSPEC_H
struct index_state;
+struct pathspec;
/* Pathspec magic */
#define PATHSPEC_FROMTOP (1<<0)
@@ -22,6 +23,28 @@ struct index_state;
#define PATHSPEC_ONESTAR 1 /* the pathspec pattern satisfies GFNM_ONESTAR */
+struct pathspec_trie {
+ struct pathspec_trie **entries;
+ int nr, alloc;
+ unsigned terminal:1,
+ must_be_dir:1;
+ char path[FLEX_ARRAY];
+};
+
+/*
+ * Build a pathspec_trie for the given pathspec.
+ */
+struct pathspec_trie *pathspec_trie_build(const struct pathspec *);
+
+/*
+ * Do a binary search on one level of the pathspec_trie. If found,
+ * returns the offset of the item in the entry list. If not found,
+ * return a negative value encoding the offset where it would be inserted
+ * (you can recover the true offset with "-pos - 1").
+ */
+int pathspec_trie_lookup(const struct pathspec_trie *pst,
+ const char *path, size_t len);
+
/**
* See glossary-content.txt for the syntax of pathspec.
* In memory, a pathspec set is represented by "struct pathspec" and is
@@ -42,6 +42,7 @@ test_tool_sources = [
'test-partial-clone.c',
'test-path-utils.c',
'test-path-walk.c',
+ 'test-pathspec.c',
'test-pcre2-config.c',
'test-pkt-line.c',
'test-proc-receive.c',
new file mode 100644
@@ -0,0 +1,98 @@
+#include "test-tool.h"
+#include "pathspec.h"
+
+static const char usage_msg[] =
+"test-tool pathspec trie [pathspecs...] -- [paths....]";
+
+/*
+ * XXX Yuck. This is a lot of complicated code specific to our test. Even if it
+ * runs correctly, we have no real guarantee that the actual trie users are
+ * doing it right. And reusing their code is tough, because it happens as part
+ * of their own traversals (e.g., we walk the pathspec trie while walking the
+ * tree objects themselves).
+ *
+ * This whole test program should probably go away in favor of directly testing
+ * the tree-diff code.
+ */
+static int trie_match(const struct pathspec_trie *pst,
+ const char *path)
+{
+ int pathlen = strlen(path);
+ int is_dir = 0;
+
+ if (pathlen > 0 && path[pathlen-1] == '/') {
+ is_dir = 1;
+ pathlen--;
+ }
+
+ while (pathlen) {
+ const char *slash = memchr(path, '/', pathlen);
+ int component_len;
+ int pos;
+
+ if (slash)
+ component_len = slash - path;
+ else
+ component_len = pathlen;
+
+ pos = pathspec_trie_lookup(pst, path, component_len);
+ if (pos < 0)
+ return 0;
+
+ pst = pst->entries[pos];
+ path += component_len;
+ pathlen -= component_len;
+
+ while (pathlen && *path == '/') {
+ path++;
+ pathlen--;
+ }
+
+ if (pst->terminal) {
+ if (!pst->must_be_dir)
+ return 1;
+ if (pathlen)
+ return 1;
+ return is_dir;
+ }
+ }
+ return 0;
+}
+
+static int cmd_trie(const char **argv)
+{
+ const char **specs, **paths;
+ struct pathspec pathspec;
+ struct pathspec_trie *trie;
+
+ paths = specs = argv;
+ while (*paths && strcmp(*paths, "--"))
+ paths++;
+ if (*paths)
+ *paths++ = NULL;
+
+ parse_pathspec(&pathspec, 0, 0, "", specs);
+ trie = pathspec_trie_build(&pathspec);
+ if (!trie)
+ die("unable to make trie from pathspec");
+
+ for (; *paths; paths++) {
+ if (trie_match(trie, *paths))
+ printf("yes\n");
+ else
+ printf("no\n");
+ }
+ return 0;
+}
+
+int cmd__pathspec(int argc UNUSED, const char **argv)
+{
+ const char *cmd = argv[1];
+
+ if (!cmd)
+ usage(usage_msg);
+ else if (!strcmp(cmd, "trie"))
+ return cmd_trie(argv + 2);
+ else
+ die("unknown cmd: %s", cmd);
+}
@@ -54,6 +54,7 @@ static struct test_cmd cmds[] = {
{ "partial-clone", cmd__partial_clone },
{ "path-utils", cmd__path_utils },
{ "path-walk", cmd__path_walk },
+ { "pathspec", cmd__pathspec },
{ "pcre2-config", cmd__pcre2_config },
{ "pkt-line", cmd__pkt_line },
{ "proc-receive", cmd__proc_receive },
@@ -46,6 +46,7 @@ int cmd__parse_options_flags(int argc, const char **argv);
int cmd__parse_pathspec_file(int argc, const char** argv);
int cmd__parse_subcommand(int argc, const char **argv);
int cmd__partial_clone(int argc, const char **argv);
+int cmd__pathspec(int argc, const char **argv);
int cmd__path_utils(int argc, const char **argv);
int cmd__path_walk(int argc, const char **argv);
int cmd__pcre2_config(int argc, const char **argv);
@@ -788,6 +788,7 @@ integration_tests = [
't6134-pathspec-in-submodule.sh',
't6135-pathspec-with-attrs.sh',
't6136-pathspec-in-bare.sh',
+ 't6137-pathspec-trie.sh',
't6200-fmt-merge-msg.sh',
't6300-for-each-ref.sh',
't6301-for-each-ref-errors.sh',
new file mode 100755
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+test_description='test optimized pathspec trie lookup'
+. ./test-lib.sh
+
+# This is a basic lookup test; the offsets are there to provide
+# some variation in where we land in our binary search.
+ps="faa fzz foo/bar foo/baa foo/bzz"
+for offset in a "a b" "a b c"; do
+ test_expect_success "lookups with ps=$offset $ps" "
+ cat >expect <<-\\EOF &&
+ no
+ yes
+ yes
+ no
+ EOF
+ test-tool pathspec trie $offset $ps -- f faa foo/bar foo/baz >actual &&
+ test_cmp expect actual
+ "
+done
+
+test_expect_success 'pathspecs match by prefix' '
+ cat >expect <<-\EOF &&
+ yes
+ yes
+ yes
+ EOF
+ test-tool pathspec trie foo -- foo foo/bar foo/with/deep/subdirs >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'trailing slash sets must_be_dir' '
+ cat >expect <<-\EOF &&
+ no
+ yes
+ yes
+ EOF
+ test-tool pathspec trie dir/ -- dir dir/ dir/foo
+'
+
+test_expect_success 'overlapping pathspecs allow the "loose" side' '
+ echo yes >expect &&
+ test-tool pathspec trie foo foo/ -- foo >actual &&
+ test_cmp expect actual &&
+ test-tool pathspec trie foo/ foo -- foo >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '"." at the root matches everything' '
+ cat >expect <<-\EOF &&
+ yes
+ yes
+ EOF
+ test-tool pathspec trie . -- foo foo/bar
+'
+
+test_done