diff mbox series

[v2,4/4] submodule-config.c: strengthen URL fsck check

Message ID b79b1a7178076be1d1a80212dfabd3c37587d443.1705542918.git.gitgitgadget@gmail.com (mailing list archive)
State Accepted
Commit 8430b438f628f2f0df08622a550e750158167f28
Headers show
Series Strengthen fsck checks for submodule URLs | expand

Commit Message

Victoria Dye Jan. 18, 2024, 1:55 a.m. UTC
From: Victoria Dye <vdye@github.com>

Update the validation of "curl URL" submodule URLs (i.e. those that specify
an "http[s]" or "ftp[s]" protocol) in 'check_submodule_url()' to catch more
invalid URLs. The existing validation using 'credential_from_url_gently()'
parses certain URLs incorrectly, leading to invalid submodule URLs passing
'git fsck' checks. Conversely, 'url_normalize()' - used to validate remote
URLs in 'remote_get()' - correctly identifies the invalid URLs missed by
'credential_from_url_gently()'.

To catch more invalid cases, replace 'credential_from_url_gently()' with
'url_normalize()' followed by a 'url_decode()' and a check for newlines
(mirroring 'check_url_component()' in the 'credential_from_url_gently()'
validation).

Signed-off-by: Victoria Dye <vdye@github.com>
---
 submodule-config.c          | 16 +++++++++++-----
 t/t7450-bad-git-dotfiles.sh | 11 +----------
 2 files changed, 12 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/submodule-config.c b/submodule-config.c
index 3b295e9f89c..54130f6a385 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -15,7 +15,7 @@ 
 #include "thread-utils.h"
 #include "tree-walk.h"
 #include "url.h"
-#include "credential.h"
+#include "urlmatch.h"
 
 /*
  * submodule cache lookup structure
@@ -350,12 +350,18 @@  int check_submodule_url(const char *url)
 	}
 
 	else if (url_to_curl_url(url, &curl_url)) {
-		struct credential c = CREDENTIAL_INIT;
 		int ret = 0;
-		if (credential_from_url_gently(&c, curl_url, 1) ||
-		    !*c.host)
+		char *normalized = url_normalize(curl_url, NULL);
+		if (normalized) {
+			char *decoded = url_decode(normalized);
+			if (strchr(decoded, '\n'))
+				ret = -1;
+			free(normalized);
+			free(decoded);
+		} else {
 			ret = -1;
-		credential_clear(&c);
+		}
+
 		return ret;
 	}
 
diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh
index c73b1c92ecc..46d4fb0354b 100755
--- a/t/t7450-bad-git-dotfiles.sh
+++ b/t/t7450-bad-git-dotfiles.sh
@@ -63,6 +63,7 @@  test_expect_success 'check urls' '
 	./%0ahost=example.com/foo.git
 	https://one.example.com/evil?%0ahost=two.example.com
 	https:///example.com/foo.git
+	http://example.com:test/foo.git
 	https::example.com/foo.git
 	http:::example.com/foo.git
 	EOF
@@ -70,16 +71,6 @@  test_expect_success 'check urls' '
 	test_cmp expect actual
 '
 
-# NEEDSWORK: the URL checked here is not valid (and will not work as a remote if
-# a user attempts to clone it), but the fsck check passes.
-test_expect_failure 'url check misses invalid cases' '
-	test-tool submodule check-url >actual <<-\EOF &&
-	http://example.com:test/foo.git
-	EOF
-
-	test_must_be_empty actual
-'
-
 test_expect_success 'create innocent subrepo' '
 	git init innocent &&
 	git -C innocent commit --allow-empty -m foo