@@ -599,6 +599,109 @@ void parse_path_pattern(const char **pattern,
*patternlen = len;
}
+static int pl_hashmap_cmp(const void *unused_cmp_data,
+ const void *a, const void *b, const void *key)
+{
+ const struct pattern_entry *ee1 = (const struct pattern_entry *)a;
+ const struct pattern_entry *ee2 = (const struct pattern_entry *)b;
+
+ size_t min_len = ee1->patternlen <= ee2->patternlen
+ ? ee1->patternlen
+ : ee2->patternlen;
+
+ return strncmp(ee1->pattern, ee2->pattern, min_len);
+}
+
+static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
+{
+ struct pattern_entry *translated;
+ char *truncated;
+ char *data = NULL;
+
+ if (!pl->use_cone_patterns)
+ return;
+
+ if (!strcmp(given->pattern, "/*"))
+ return;
+
+ if (given->patternlen > 2 &&
+ !strcmp(given->pattern + given->patternlen - 2, "/*")) {
+ if (!(given->flags & PATTERN_FLAG_NEGATIVE)) {
+ /* Not a cone pattern. */
+ pl->use_cone_patterns = 0;
+ warning(_("unrecognized pattern: '%s'"), given->pattern);
+ goto clear_hashmaps;
+ }
+
+ truncated = xstrdup(given->pattern);
+ truncated[given->patternlen - 2] = 0;
+
+ translated = xmalloc(sizeof(struct pattern_entry));
+ translated->pattern = truncated;
+ translated->patternlen = given->patternlen - 2;
+ hashmap_entry_init(translated,
+ memhash(translated->pattern, translated->patternlen));
+
+ if (!hashmap_get(&pl->recursive_hashmap, translated, NULL)) {
+ /* We did not see the "parent" included */
+ warning(_("unrecognized negative pattern: '%s'"),
+ given->pattern);
+ free(truncated);
+ free(translated);
+ goto clear_hashmaps;
+ }
+
+ hashmap_add(&pl->parent_hashmap, translated);
+ hashmap_remove(&pl->recursive_hashmap, translated, &data);
+ free(data);
+ return;
+ }
+
+ if (given->flags & PATTERN_FLAG_NEGATIVE) {
+ warning(_("unrecognized negative pattern: '%s'"),
+ given->pattern);
+ goto clear_hashmaps;
+ }
+
+ translated = xmalloc(sizeof(struct pattern_entry));
+
+ translated->pattern = xstrdup(given->pattern);
+ translated->patternlen = given->patternlen;
+ hashmap_entry_init(translated,
+ memhash(translated->pattern, translated->patternlen));
+
+ hashmap_add(&pl->recursive_hashmap, translated);
+
+ if (hashmap_get(&pl->parent_hashmap, translated, NULL)) {
+ /* we already included this at the parent level */
+ warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"),
+ given->pattern);
+ hashmap_remove(&pl->parent_hashmap, translated, &data);
+ free(data);
+ free(translated);
+ }
+
+ return;
+
+clear_hashmaps:
+ warning(_("disabling cone pattern matching"));
+ hashmap_free(&pl->parent_hashmap, 1);
+ hashmap_free(&pl->recursive_hashmap, 1);
+ pl->use_cone_patterns = 0;
+}
+
+static int hashmap_contains_path(struct hashmap *map,
+ struct strbuf *pattern)
+{
+ struct pattern_entry p;
+
+ /* Check straight mapping */
+ p.pattern = pattern->buf;
+ p.patternlen = pattern->len;
+ hashmap_entry_init(&p, memhash(p.pattern, p.patternlen));
+ return !!hashmap_get(map, &p, NULL);
+}
+
void add_pattern(const char *string, const char *base,
int baselen, struct pattern_list *pl, int srcpos)
{
@@ -623,6 +726,8 @@ void add_pattern(const char *string, const char *base,
ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc);
pl->patterns[pl->nr++] = pattern;
pattern->pl = pl;
+
+ add_pattern_to_hashsets(pl, pattern);
}
static int read_skip_worktree_file_from_index(const struct index_state *istate,
@@ -848,6 +953,10 @@ static int add_patterns_from_buffer(char *buf, size_t size,
int i, lineno = 1;
char *entry;
+ pl->use_cone_patterns = core_sparse_checkout_cone;
+ hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
+ hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
+
pl->filebuf = buf;
if (skip_utf8_bom(&buf, size))
@@ -1084,16 +1193,64 @@ enum pattern_match_result path_matches_pattern_list(
struct index_state *istate)
{
struct path_pattern *pattern;
- pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
- dtype, pl, istate);
- if (pattern) {
- if (pattern->flags & PATTERN_FLAG_NEGATIVE)
- return NOT_MATCHED;
- else
- return MATCHED;
+ struct strbuf parent_pathname = STRBUF_INIT;
+ int result = NOT_MATCHED;
+ const char *slash_pos;
+
+ if (!pl->use_cone_patterns) {
+ pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
+ dtype, pl, istate);
+ if (pattern) {
+ if (pattern->flags & PATTERN_FLAG_NEGATIVE)
+ return NOT_MATCHED;
+ else
+ return MATCHED;
+ }
+
+ return UNDECIDED;
}
- return UNDECIDED;
+ strbuf_addch(&parent_pathname, '/');
+ strbuf_add(&parent_pathname, pathname, pathlen);
+
+ if (hashmap_contains_path(&pl->recursive_hashmap,
+ &parent_pathname)) {
+ result = MATCHED;
+ goto done;
+ }
+
+ slash_pos = strrchr(parent_pathname.buf, '/');
+
+ if (slash_pos == parent_pathname.buf) {
+ /* include every file in root */
+ result = MATCHED;
+ goto done;
+ }
+
+ strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+
+ if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
+ result = MATCHED;
+ goto done;
+ }
+
+ while (parent_pathname.len) {
+ if (hashmap_contains_path(&pl->recursive_hashmap,
+ &parent_pathname)) {
+ result = UNDECIDED;
+ goto done;
+ }
+
+ slash_pos = strrchr(parent_pathname.buf, '/');
+ if (slash_pos == parent_pathname.buf)
+ break;
+
+ strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+ }
+
+done:
+ strbuf_release(&parent_pathname);
+ return result;
}
static struct path_pattern *last_matching_pattern_from_lists(
@@ -4,6 +4,7 @@
/* See Documentation/technical/api-directory-listing.txt */
#include "cache.h"
+#include "hashmap.h"
#include "strbuf.h"
struct dir_entry {
@@ -37,6 +38,13 @@ struct path_pattern {
int srcpos;
};
+/* used for hashmaps for cone patterns */
+struct pattern_entry {
+ struct hashmap_entry ent;
+ char *pattern;
+ size_t patternlen;
+};
+
/*
* Each excludes file will be parsed into a fresh exclude_list which
* is appended to the relevant exclude_list_group (either EXC_DIRS or
@@ -55,6 +63,25 @@ struct pattern_list {
const char *src;
struct path_pattern **patterns;
+
+ /*
+ * While scanning the excludes, we attempt to match the patterns
+ * with a more restricted set that allows us to use hashsets for
+ * matching logic, which is faster than the linear lookup in the
+ * excludes array above. If non-zero, that check succeeded.
+ */
+ unsigned use_cone_patterns;
+
+ /*
+ * Stores paths where everything starting with those paths
+ * is included.
+ */
+ struct hashmap recursive_hashmap;
+
+ /*
+ * Used to check single-level parents of blobs.
+ */
+ struct hashmap parent_hashmap;
};
/*
@@ -143,7 +143,8 @@ test_expect_success 'set sparse-checkout using --stdin' '
test_expect_success 'cone mode: match patterns' '
git -C repo config --worktree core.sparseCheckoutCone true &&
rm -rf repo/a repo/folder1 repo/folder2 &&
- git -C repo read-tree -mu HEAD &&
+ git -C repo read-tree -mu HEAD 2>err &&
+ test_i18ngrep ! "disabling cone patterns" err &&
git -C repo reset --hard &&
ls repo >dir &&
cat >expect <<-EOF &&
@@ -154,6 +155,14 @@ test_expect_success 'cone mode: match patterns' '
test_cmp expect dir
'
+test_expect_success 'cone mode: warn on bad pattern' '
+ test_when_finished mv sparse-checkout repo/.git/info/ &&
+ cp repo/.git/info/sparse-checkout . &&
+ echo "!/deep/deeper/*" >>repo/.git/info/sparse-checkout &&
+ git -C repo read-tree -mu HEAD 2>err &&
+ test_i18ngrep "unrecognized negative pattern" err
+'
+
test_expect_success 'sparse-checkout disable' '
git -C repo sparse-checkout disable &&
test_path_is_missing repo/.git/info/sparse-checkout &&