[7/8] sparse-checkout: properly match escaped characters
diff mbox series

Message ID 09dbe1f9029d173e579b74442b02bc20e1a82437.1579029963.git.gitgitgadget@gmail.com
State New
Headers show
Series
  • Harden the sparse-checkout builtin
Related show

Commit Message

Sibi Siddharthan via GitGitGadget Jan. 14, 2020, 7:26 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

In cone mode, the sparse-checkout feature uses hashset containment
queries to match paths. Make this algorithm respect escaped asterisk
(*) and backslash (\) characters.

Create dup_and_filter_pattern() method to convert a pattern by
removing escape characters and dropping an optional "/*" at the end.
This method is available in dir.h as we will use it in
builtin/sparse-chekcout.c in a later change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 dir.c                              | 31 +++++++++++++++++++++++++++---
 dir.h                              |  1 +
 t/t1091-sparse-checkout-builtin.sh | 22 +++++++++++++++++----
 3 files changed, 47 insertions(+), 7 deletions(-)

Comments

Jeff King Jan. 14, 2020, 9:21 p.m. UTC | #1
On Tue, Jan 14, 2020 at 07:26:01PM +0000, Derrick Stolee via GitGitGadget wrote:

> From: Derrick Stolee <dstolee@microsoft.com>
> 
> In cone mode, the sparse-checkout feature uses hashset containment
> queries to match paths. Make this algorithm respect escaped asterisk
> (*) and backslash (\) characters.
> 
> Create dup_and_filter_pattern() method to convert a pattern by
> removing escape characters and dropping an optional "/*" at the end.
> This method is available in dir.h as we will use it in
> builtin/sparse-chekcout.c in a later change.

s/chekcout/checkout/

It took me a minute to understand the problem here, but I think it's: if
a path in the sparse-checkout file has "\*" in it, we'd try to match a
literal "\*" in the hash, not "*"?

But we wouldn't run into that yet because we don't properly _write_ the
escaped names until patch 8.

Is that right?

-Peff
Derrick Stolee Jan. 14, 2020, 10:08 p.m. UTC | #2
On 1/14/2020 4:21 PM, Jeff King wrote:
> On Tue, Jan 14, 2020 at 07:26:01PM +0000, Derrick Stolee via GitGitGadget wrote:
> 
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> In cone mode, the sparse-checkout feature uses hashset containment
>> queries to match paths. Make this algorithm respect escaped asterisk
>> (*) and backslash (\) characters.
>>
>> Create dup_and_filter_pattern() method to convert a pattern by
>> removing escape characters and dropping an optional "/*" at the end.
>> This method is available in dir.h as we will use it in
>> builtin/sparse-chekcout.c in a later change.
> 
> s/chekcout/checkout/

Thanks.

> It took me a minute to understand the problem here, but I think it's: if
> a path in the sparse-checkout file has "\*" in it, we'd try to match a
> literal "\*" in the hash, not "*"?

Yes, the hashset would have the string "\*" instead of the string "*". This
would lead to missing directories when cone mode is enabled compared to
cone mode not being enabled.
 
> But we wouldn't run into that yet because we don't properly _write_ the
> escaped names until patch 8.

We wouldn't run into it when using the builtin, but also a user could
edit their sparse-checkout file manually OR figure out how to get the
"right" pattern by running "git sparse-checkout set "my\\*dir" (where the
escaped backslash is collapsed by the shell and Git sees "my\*dir".

Thanks,
-Stolee

Patch
diff mbox series

diff --git a/dir.c b/dir.c
index 150c05f4de..2840bacd40 100644
--- a/dir.c
+++ b/dir.c
@@ -630,6 +630,32 @@  int pl_hashmap_cmp(const void *unused_cmp_data,
 	return strncmp(ee1->pattern, ee2->pattern, min_len);
 }
 
+char *dup_and_filter_pattern(const char *pattern)
+{
+	char *set, *read;
+	char *result = xstrdup(pattern);
+
+	set = result;
+	read = result;
+
+	while (*read) {
+		/* skip escape characters (once) */
+		if (*read == '\\')
+			read++;
+
+		*set = *read;
+
+		set++;
+		read++;
+	}
+	*set = 0;
+
+	if (*(read - 2) == '/' && *(read - 1) == '*')
+		*(read - 2) = 0;
+
+	return result;
+}
+
 static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
 {
 	struct pattern_entry *translated;
@@ -698,8 +724,7 @@  static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern
 			goto clear_hashmaps;
 		}
 
-		truncated = xstrdup(given->pattern);
-		truncated[given->patternlen - 2] = 0;
+		truncated = dup_and_filter_pattern(given->pattern);
 
 		translated = xmalloc(sizeof(struct pattern_entry));
 		translated->pattern = truncated;
@@ -733,7 +758,7 @@  static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern
 
 	translated = xmalloc(sizeof(struct pattern_entry));
 
-	translated->pattern = xstrdup(given->pattern);
+	translated->pattern = dup_and_filter_pattern(given->pattern);
 	translated->patternlen = given->patternlen;
 	hashmap_entry_init(&translated->ent,
 			   ignore_case ?
diff --git a/dir.h b/dir.h
index 77a43dbf89..6dcd9d33e7 100644
--- a/dir.h
+++ b/dir.h
@@ -304,6 +304,7 @@  int pl_hashmap_cmp(const void *unused_cmp_data,
 		   const struct hashmap_entry *a,
 		   const struct hashmap_entry *b,
 		   const void *key);
+char *dup_and_filter_pattern(const char *pattern);
 int hashmap_contains_parent(struct hashmap *map,
 			    const char *path,
 			    struct strbuf *buffer);
diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh
index 5b50be53a4..051c1f3bf2 100755
--- a/t/t1091-sparse-checkout-builtin.sh
+++ b/t/t1091-sparse-checkout-builtin.sh
@@ -366,13 +366,27 @@  test_expect_success 'pattern-checks: starting "*"' '
 	check_read_tree_errors repo "a deep" "disabling cone pattern matching"
 '
 
-test_expect_success 'pattern-checks: escaped "*"' '
-	cat >repo/.git/info/sparse-checkout <<-\EOF &&
+test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' '
+	git clone repo escaped &&
+	TREEOID=$(git -C escaped rev-parse HEAD:folder1) &&
+	NEWTREE=$(git -C escaped mktree <<-EOF
+	$(git -C escaped ls-tree HEAD)
+	040000 tree $TREEOID	zbad\\dir
+	040000 tree $TREEOID	zdoes*exist
+	EOF
+	) &&
+	COMMIT=$(git -C escaped commit-tree $NEWTREE -p HEAD) &&
+	git -C escaped reset --hard $COMMIT &&
+	check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" &&
+	git -C escaped sparse-checkout init --cone &&
+	cat >escaped/.git/info/sparse-checkout <<-\EOF &&
 	/*
 	!/*/
-	/does\*not\*exist/
+	/zbad\\dir/
+	/zdoes\*not\*exist/
+	/zdoes\*exist/
 	EOF
-	check_read_tree_errors repo "a" ""
+	check_read_tree_errors escaped "a zbad\\dir zdoes*exist"
 '
 
 test_done