diff mbox series

[09/16] mktree: validate paths more carefully

Message ID 4f9f77e693cfc4fbe72a2ae739bc7e236a3b82d3.1718130288.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series mktree: support more flexible usage | expand

Commit Message

Victoria Dye June 11, 2024, 6:24 p.m. UTC
From: Victoria Dye <vdye@github.com>

Use 'verify_path' to validate the paths provided as tree entries, ensuring
we do not create entries with paths not allowed in trees (e.g., .git). Also,
remove trailing slashes on directories before validating, allowing users to
provide 'folder-name/' as the path for a tree object entry.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 builtin/mktree.c  | 20 +++++++++++++++++---
 t/t1010-mktree.sh | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)

Comments

Junio C Hamano June 12, 2024, 2:26 a.m. UTC | #1
"Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Victoria Dye <vdye@github.com>
>
> Use 'verify_path' to validate the paths provided as tree entries, ensuring
> we do not create entries with paths not allowed in trees (e.g.,
> .git).

Sensible.

> Also,
> remove trailing slashes on directories before validating, allowing users to
> provide 'folder-name/' as the path for a tree object entry.

Is that a good idea for a plumbing like this command?  We would
silently accept these after silently stripping the trailing slash?

040000 tree 82a33d5150d9316378ef1955a49f2a5bf21aaeb2    templates/
100644 blob 1f89ffab4c32bc02b5d955851401628a5b9a540e    thread-utils.c/

The former _might_ count as "usability improvement", but if we are
doing the same for the latter we might be going a bit too lenient.

Let's see what really happens in the code.

> @@ -49,10 +50,23 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
>  {
>  	struct tree_entry *ent;
>  	size_t len = strlen(path);
> -	if (!literally && strchr(path, '/'))
> -		die("path %s contains slash", path);
>  
> -	FLEX_ALLOC_MEM(ent, name, path, len);
> +	if (literally) {
> +		FLEX_ALLOC_MEM(ent, name, path, len);
> +	} else {
> +		/* Normalize and validate entry path */
> +		if (S_ISDIR(mode)) {
> +			while(len > 0 && is_dir_sep(path[len - 1]))
> +				len--;
> +		}

Leave a single SP after "while", please.

We do this only to subtree entries, and all trailing slashes, not
just a single one.  OK, but I am not sure if the extra leniency is a
good idea to begin with.  "ls-tree" output does not have such a
trailing slashes, so it is unclear whom we are trying to be extra
nice with this.

> +		FLEX_ALLOC_MEM(ent, name, path, len);
> +
> +		if (!verify_path(ent->name, mode))
> +			die(_("invalid path '%s'"), path);

This is the crux of the change.  And it is so simple.  Very nice.

> +		if (strchr(ent->name, '/'))
> +			die("path %s contains slash", path);
> +	}

> diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
> index e0687cb529f..e0263cb2bf8 100755
> --- a/t/t1010-mktree.sh
> +++ b/t/t1010-mktree.sh
> @@ -173,4 +173,37 @@ test_expect_success '--literally can create invalid trees' '
>  	grep "not properly sorted" err
>  '
>  
> +test_expect_success 'mktree validates path' '
> +	tree_oid="$(cat tree)" &&
> +	blob_oid="$(git rev-parse $tree_oid:a/one)" &&
> +	head_oid="$(git rev-parse HEAD)" &&
> +
> +	# Valid: tree with or without trailing slash, blob without trailing slash
> +	{
> +		printf "040000 tree $tree_oid\tfolder1/\n" &&
> +		printf "040000 tree $tree_oid\tfolder2\n" &&
> +		printf "100644 blob $blob_oid\tfile.txt\n"
> +	} | git mktree >actual &&
> +
> +	# Invalid: blob with trailing slash
> +	printf "100644 blob $blob_oid\ttest/" |
> +	test_must_fail git mktree 2>err &&
> +	grep "invalid path ${SQ}test/${SQ}" err &&
> +
> +	# Invalid: dotdot
> +	printf "040000 tree $tree_oid\t../" |
> +	test_must_fail git mktree 2>err &&
> +	grep "invalid path ${SQ}../${SQ}" err &&
> +
> +	# Invalid: dot
> +	printf "040000 tree $tree_oid\t." |
> +	test_must_fail git mktree 2>err &&
> +	grep "invalid path ${SQ}.${SQ}" err &&
> +
> +	# Invalid: .git
> +	printf "040000 tree $tree_oid\t.git/" |
> +	test_must_fail git mktree 2>err &&
> +	grep "invalid path ${SQ}.git/${SQ}" err
> +'
> +
>  test_done
Victoria Dye June 12, 2024, 7:01 p.m. UTC | #2
Junio C Hamano wrote:
> "Victoria Dye via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> Also,
>> remove trailing slashes on directories before validating, allowing users to
>> provide 'folder-name/' as the path for a tree object entry.
> 
> Is that a good idea for a plumbing like this command?  We would
> silently accept these after silently stripping the trailing slash?
> 
> 040000 tree 82a33d5150d9316378ef1955a49f2a5bf21aaeb2    templates/
> 100644 blob 1f89ffab4c32bc02b5d955851401628a5b9a540e    thread-utils.c/
> 
> The former _might_ count as "usability improvement", but if we are
> doing the same for the latter we might be going a bit too lenient.

The trailing slashes are only ignored on tree entries (with mode 040000), so
the latter case would not be allowed (and triggers a 'die()' as it would
today).

> 
> Let's see what really happens in the code.
> 
>> @@ -49,10 +50,23 @@ static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
>>  {
>>  	struct tree_entry *ent;
>>  	size_t len = strlen(path);
>> -	if (!literally && strchr(path, '/'))
>> -		die("path %s contains slash", path);
>>  
>> -	FLEX_ALLOC_MEM(ent, name, path, len);
>> +	if (literally) {
>> +		FLEX_ALLOC_MEM(ent, name, path, len);
>> +	} else {
>> +		/* Normalize and validate entry path */
>> +		if (S_ISDIR(mode)) {
>> +			while(len > 0 && is_dir_sep(path[len - 1]))
>> +				len--;
>> +		}
> 
> Leave a single SP after "while", please.

Ah, sorry about that, thanks for catching it.

> We do this only to subtree entries, and all trailing slashes, not
> just a single one.  OK, but I am not sure if the extra leniency is a
> good idea to begin with.  "ls-tree" output does not have such a
> trailing slashes, so it is unclear whom we are trying to be extra
> nice with this.

It might be a bit niche, but 'git ls-files -s --sparse' does print
directories with a trailing slash, and in a format that is otherwise
accepted by the command after switching to 'read_index_info' for input
parsing.
Junio C Hamano June 12, 2024, 7:45 p.m. UTC | #3
Victoria Dye <vdye@github.com> writes:

> It might be a bit niche, but 'git ls-files -s --sparse' does print
> directories with a trailing slash, ...

OK.
diff mbox series

Patch

diff --git a/builtin/mktree.c b/builtin/mktree.c
index 48019448c1f..29e9dc6ce69 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -8,6 +8,7 @@ 
 #include "hex.h"
 #include "index-info.h"
 #include "quote.h"
+#include "read-cache-ll.h"
 #include "strbuf.h"
 #include "tree.h"
 #include "parse-options.h"
@@ -49,10 +50,23 @@  static void append_to_tree(unsigned mode, struct object_id *oid, const char *pat
 {
 	struct tree_entry *ent;
 	size_t len = strlen(path);
-	if (!literally && strchr(path, '/'))
-		die("path %s contains slash", path);
 
-	FLEX_ALLOC_MEM(ent, name, path, len);
+	if (literally) {
+		FLEX_ALLOC_MEM(ent, name, path, len);
+	} else {
+		/* Normalize and validate entry path */
+		if (S_ISDIR(mode)) {
+			while(len > 0 && is_dir_sep(path[len - 1]))
+				len--;
+		}
+		FLEX_ALLOC_MEM(ent, name, path, len);
+
+		if (!verify_path(ent->name, mode))
+			die(_("invalid path '%s'"), path);
+		if (strchr(ent->name, '/'))
+			die("path %s contains slash", path);
+	}
+
 	ent->mode = mode;
 	ent->len = len;
 	oidcpy(&ent->oid, oid);
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index e0687cb529f..e0263cb2bf8 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -173,4 +173,37 @@  test_expect_success '--literally can create invalid trees' '
 	grep "not properly sorted" err
 '
 
+test_expect_success 'mktree validates path' '
+	tree_oid="$(cat tree)" &&
+	blob_oid="$(git rev-parse $tree_oid:a/one)" &&
+	head_oid="$(git rev-parse HEAD)" &&
+
+	# Valid: tree with or without trailing slash, blob without trailing slash
+	{
+		printf "040000 tree $tree_oid\tfolder1/\n" &&
+		printf "040000 tree $tree_oid\tfolder2\n" &&
+		printf "100644 blob $blob_oid\tfile.txt\n"
+	} | git mktree >actual &&
+
+	# Invalid: blob with trailing slash
+	printf "100644 blob $blob_oid\ttest/" |
+	test_must_fail git mktree 2>err &&
+	grep "invalid path ${SQ}test/${SQ}" err &&
+
+	# Invalid: dotdot
+	printf "040000 tree $tree_oid\t../" |
+	test_must_fail git mktree 2>err &&
+	grep "invalid path ${SQ}../${SQ}" err &&
+
+	# Invalid: dot
+	printf "040000 tree $tree_oid\t." |
+	test_must_fail git mktree 2>err &&
+	grep "invalid path ${SQ}.${SQ}" err &&
+
+	# Invalid: .git
+	printf "040000 tree $tree_oid\t.git/" |
+	test_must_fail git mktree 2>err &&
+	grep "invalid path ${SQ}.git/${SQ}" err
+'
+
 test_done