diff mbox series

[v5] http: do not ignore proxy path

Message ID pull.1767.v5.git.1722576007398.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series [v5] http: do not ignore proxy path | expand

Commit Message

Ryan Hendrickson Aug. 2, 2024, 5:20 a.m. UTC
From: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>

The documentation for `http.proxy` describes that option, and the
environment variables it overrides, as supporting "the syntax understood
by curl". curl allows SOCKS proxies to use a path to a Unix domain
socket, like `socks5h://localhost/path/to/socket.sock`. Git should
therefore include, if present, the path part of the proxy URL in what it
passes to libcurl.

Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
Signed-off-by: Jeff King <peff@peff.net>
---
    http: do not ignore proxy path

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1767%2Frhendric%2Frhendric%2Fhttp-proxy-path-v5
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1767/rhendric/rhendric/http-proxy-path-v5
Pull-Request: https://github.com/gitgitgadget/git/pull/1767

Range-diff vs v4:

 1:  507fb75c1a6 ! 1:  fa101a3b264 http: do not ignore proxy path
     @@ Commit message
      
          Co-authored-by: Jeff King <peff@peff.net>
          Signed-off-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
     +    Signed-off-by: Jeff King <peff@peff.net>
      
       ## Documentation/config/http.txt ##
      @@ Documentation/config/http.txt: http.proxy::
     @@ t/t5564-http-proxy.sh: test_expect_success 'clone can prompt for proxy password'
      +
      +test_atexit 'test ! -e "$TRASH_DIRECTORY/socks.pid" || kill "$(cat "$TRASH_DIRECTORY/socks.pid")"'
      +
     ++# The below tests morally ought to be gated on a prerequisite that Git is
     ++# linked with a libcurl that supports Unix socket paths for proxies (7.84 or
     ++# later), but this is not easy to test right now. Instead, we || the tests with
     ++# this function.
     ++old_libcurl_error() {
     ++	grep -Fx "fatal: libcurl 7.84 or later is required to support paths in proxy URLs" "$1"
     ++}
     ++
      +test_expect_success SOCKS_PROXY 'clone via Unix socket' '
      +	test_when_finished "rm -rf clone" &&
      +	test_config_global http.proxy "socks4://localhost$PWD/%2530.sock" && {
      +		{
      +			GIT_TRACE_CURL=$PWD/trace git clone "$HTTPD_URL/smart/repo.git" clone 2>err &&
     -+			grep -i "SOCKS4 request granted." trace
     ++			grep -i "SOCKS4 request granted" trace
      +		} ||
     -+		grep "^fatal: libcurl 7\.84 or later" err
     ++		old_libcurl_error err
      +	}
      +'
      +
     -+test_expect_success 'Unix socket requires socks*:' '
     ++test_expect_success 'Unix socket requires socks*:' - <<\EOT
      +	! git clone -c http.proxy=localhost/path https://example.com/repo.git 2>err && {
     -+		grep "^fatal: Invalid proxy URL '\''localhost/path'\'': only SOCKS proxies support paths" err ||
     -+		grep "^fatal: libcurl 7\.84 or later" err
     ++		grep -Fx "fatal: Invalid proxy URL 'localhost/path': only SOCKS proxies support paths" err ||
     ++		old_libcurl_error err
      +	}
     -+'
     ++EOT
      +
     -+test_expect_success 'Unix socket requires localhost' '
     ++test_expect_success 'Unix socket requires localhost' - <<\EOT
      +	! git clone -c http.proxy=socks4://127.0.0.1/path https://example.com/repo.git 2>err && {
     -+		grep "^fatal: Invalid proxy URL '\''socks4://127\.0\.0\.1/path'\'': host must be localhost if a path is present" err ||
     -+		grep "^fatal: libcurl 7\.84 or later" err
     ++		grep -Fx "fatal: Invalid proxy URL 'socks4://127.0.0.1/path': host must be localhost if a path is present" err ||
     ++		old_libcurl_error err
      +	}
     -+'
     ++EOT
      +
       test_done


 Documentation/config/http.txt |  4 +--
 http.c                        | 24 ++++++++++++++++-
 t/socks4-proxy.pl             | 48 ++++++++++++++++++++++++++++++++++
 t/t5564-http-proxy.sh         | 49 +++++++++++++++++++++++++++++++++++
 4 files changed, 122 insertions(+), 3 deletions(-)
 create mode 100644 t/socks4-proxy.pl


base-commit: 406f326d271e0bacecdb00425422c5fa3f314930

Comments

Junio C Hamano Aug. 2, 2024, 3:52 p.m. UTC | #1
"Ryan Hendrickson via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
>
> The documentation for `http.proxy` describes that option, and the
> environment variables it overrides, as supporting "the syntax understood
> by curl". curl allows SOCKS proxies to use a path to a Unix domain
> socket, like `socks5h://localhost/path/to/socket.sock`. Git should
> therefore include, if present, the path part of the proxy URL in what it
> passes to libcurl.
>
> Co-authored-by: Jeff King <peff@peff.net>
> Signed-off-by: Ryan Hendrickson <ryan.hendrickson@alum.mit.edu>
> Signed-off-by: Jeff King <peff@peff.net>
> ---

The trailer lines should be ordered in chronological order to record
the provenance of the patch.  The last two entries make it look as
if what you assembled and signed-off was relayed by peff, with or
without further modification, with his sign-off to me, to become the
final version, but that is not the story you want to tell.

I'd swap the two lines (i.e., sign-offs) while queuing.

> +			if (!starts_with(proxy_auth.protocol, "socks"))
> +				die("Invalid proxy URL '%s': only SOCKS proxies support paths",
> +				    curl_http_proxy);

Our error messages that are prefixed with "fatal:" do not typically
begin with a capital letter.

But I'll let it pass, as this copies an existing message in this
file.  #leftoverbits clean-up needs to correct the ones added by
this patch and existing "Invalid proxy URL '%s'" by downcasing
"Invalid", possibly enclose the message in _() for i18n, and also
downcase "C" in two "Could not set SSL ..."  messages.

> +			if (strcasecmp(proxy_auth.host, "localhost"))
> +				die("Invalid proxy URL '%s': host must be localhost if a path is present",
> +				    curl_http_proxy);

Ditto.

Or instead of leaving this for a later clean-up after the dust
settles, we could have a separate "preliminary clean-up" patch to
address existing ones first.  Either is fine, but taking "clean-up
after the dust settles" and leaving it a problem for other people is
probably easier.

> diff --git a/t/t5564-http-proxy.sh b/t/t5564-http-proxy.sh
> index bb35b87071d..0d6cfebbfab 100755
> --- a/t/t5564-http-proxy.sh
> +++ b/t/t5564-http-proxy.sh
> @@ -39,4 +39,53 @@ test_expect_success 'clone can prompt for proxy password' '
>  	expect_askpass pass proxuser
>  '
>  
> +start_socks() {
> +	mkfifo socks_output &&
> +	{
> +		"$PERL_PATH" "$TEST_DIRECTORY/socks4-proxy.pl" "$1" >socks_output &
> +		echo $! > "$TRASH_DIRECTORY/socks.pid"
> +	} &&
> +	read line <socks_output &&
> +	test "$line" = ready
> +}
> +
> +# The %30 tests that the correct amount of percent-encoding is applied to the
> +# proxy string passed to curl.
> +test_lazy_prereq SOCKS_PROXY 'test_have_prereq PERL && start_socks "$TRASH_DIRECTORY/%30.sock"'
> +
> +test_atexit 'test ! -e "$TRASH_DIRECTORY/socks.pid" || kill "$(cat "$TRASH_DIRECTORY/socks.pid")"'

Let's line-wrap these overly long ones.

        # The %30 tests that the correct amount of percent-encoding is
        # applied to the proxy string passed to curl.
        test_lazy_prereq SOCKS_PROXY '
                test_have_prereq PERL &&
                start_socks "$TRASH_DIRECTORY/%30.sock"
        '

        test_atexit '
                test ! -e "$TRASH_DIRECTORY/socks.pid" ||
                kill "$(cat "$TRASH_DIRECTORY/socks.pid")"
        '

> +# The below tests morally ought to be gated on a prerequisite that Git is
> +# linked with a libcurl that supports Unix socket paths for proxies (7.84 or
> +# later), but this is not easy to test right now. Instead, we || the tests with
> +# this function.
> +old_libcurl_error() {
> +	grep -Fx "fatal: libcurl 7.84 or later is required to support paths in proxy URLs" "$1"
> +}

Cute.  

This fixes the polarity of the result from the whole "{ test } ||
this helper" construct.  Even if the test proper fails, we will
allow such a failure only when the reason of the failure is due to
this message.

If I were writing this, I would shorten to look for a bit fuzzier
pattern like

    grep "^fatal: .* is required to support paths in proxy URLs" "$1"

as that would allow us to fix the code later without needing to
update the pattern, if we discover reasons, other than being older
than libcURL 7.84, why paths in proxy URLs cannot be supported.

Other than that, very nicely done.

Thanks.
Ryan Hendrickson Aug. 2, 2024, 4:43 p.m. UTC | #2
At 2024-08-02 08:52-0700, Junio C Hamano <gitster@pobox.com> sent:

> I'd swap the two lines (i.e., sign-offs) while queuing.

Okay.

> Our error messages that are prefixed with "fatal:" do not typically
> begin with a capital letter.
>
> But I'll let it pass, as this copies an existing message in this
> file.  #leftoverbits clean-up needs to correct the ones added by
> this patch and existing "Invalid proxy URL '%s'" by downcasing
> "Invalid", possibly enclose the message in _() for i18n, and also
> downcase "C" in two "Could not set SSL ..."  messages.
>
> [ . . . ]
>
> Or instead of leaving this for a later clean-up after the dust
> settles, we could have a separate "preliminary clean-up" patch to
> address existing ones first.  Either is fine, but taking "clean-up
> after the dust settles" and leaving it a problem for other people is
> probably easier.

Hmm. I'd be inclined to take the preliminary clean-up approach, but some 
of the existing strings (there are also two "Unsupported ..."/"Supported 
..." strings near the "Could not set..."s) are going through gettext, and 
I'm reluctant to interfere with the l10n process.

Would a partial clean-up that downcased only the "Invalid proxy URL" 
message be acceptable, or worse than leaving this as #leftoverbits?

> Let's line-wrap these overly long ones.

Okay.

> If I were writing this, I would shorten to look for a bit fuzzier
> pattern like
>
>    grep "^fatal: .* is required to support paths in proxy URLs" "$1"
>
> as that would allow us to fix the code later without needing to
> update the pattern, if we discover reasons, other than being older
> than libcURL 7.84, why paths in proxy URLs cannot be supported.

Is this blocking feedback? This strikes me as speculative 
over-engineering; it would not be difficult to generalize the message, and 
possibly then choose a new, more appropriate function name and update the 
explanatory comment, when and if such reasons arise.

R
Junio C Hamano Aug. 2, 2024, 5:10 p.m. UTC | #3
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

> Hmm. I'd be inclined to take the preliminary clean-up approach, but
> some of the existing strings (there are also two "Unsupported
> ..."/"Supported ..." strings near the "Could not set..."s) are going
> through gettext, and I'm reluctant to interfere with the l10n process.

I do not see what you mean by interfering with the localization.

If we are updating text to be translated anyway, giving translators
the strings that need to be translated _earlier_ rather than later
would be more helpful to them, no?

>> If I were writing this, I would shorten to look for a bit fuzzier
>> pattern like
>>
>>    grep "^fatal: .* is required to support paths in proxy URLs" "$1"
>>
>> as that would allow us to fix the code later without needing to
>> update the pattern, if we discover reasons, other than being older
>> than libcURL 7.84, why paths in proxy URLs cannot be supported.
>
> Is this blocking feedback? This strikes me as speculative
> over-engineering

No, it is loosening a pattern that is overly tight and as a side
effect shortening the line to more readable length ;-).
Ryan Hendrickson Aug. 2, 2024, 6:03 p.m. UTC | #4
At 2024-08-02 10:10-0700, Junio C Hamano <gitster@pobox.com> sent:

> Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:
>
>> Hmm. I'd be inclined to take the preliminary clean-up approach, but
>> some of the existing strings (there are also two "Unsupported
>> ..."/"Supported ..." strings near the "Could not set..."s) are going
>> through gettext, and I'm reluctant to interfere with the l10n process.
>
> I do not see what you mean by interfering with the localization.
>
> If we are updating text to be translated anyway, giving translators
> the strings that need to be translated _earlier_ rather than later
> would be more helpful to them, no?

Probably true, but as a new contributor I don't know whether changing 
msgids means more people need to review the patch, more files need to be 
changed, a translation team needs to be notified, the change needs to be 
pushed a different branch... whatever your process is. Localized strings 
are generally more of a headache for drive-by contributors, in my 
experience across different projects.

>> Is this blocking feedback? This strikes me as speculative
>> over-engineering
>
> No, it is loosening a pattern that is overly tight and as a side
> effect shortening the line to more readable length ;-).

Blocking or not?

R
Junio C Hamano Aug. 2, 2024, 7:28 p.m. UTC | #5
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

> At 2024-08-02 10:10-0700, Junio C Hamano <gitster@pobox.com> sent:
>
>> Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:
>>
>>> Hmm. I'd be inclined to take the preliminary clean-up approach, but
>>> some of the existing strings (there are also two "Unsupported
>>> ..."/"Supported ..." strings near the "Could not set..."s) are going
>>> through gettext, and I'm reluctant to interfere with the l10n process.
>>
>> I do not see what you mean by interfering with the localization.
>>
>> If we are updating text to be translated anyway, giving translators
>> the strings that need to be translated _earlier_ rather than later
>> would be more helpful to them, no?
>
> Probably true, but as a new contributor I don't know whether changing
> msgids means more people need to review the patch, more files need to
> be changed, a translation team needs to be notified, the change needs
> to be pushed a different branch... whatever your process is. Localized
> strings are generally more of a headache for drive-by contributors, in
> my experience across different projects.
>
>>> Is this blocking feedback? This strikes me as speculative
>>> over-engineering
>>
>> No, it is loosening a pattern that is overly tight and as a side
>> effect shortening the line to more readable length ;-).
>
> Blocking or not?

If we are updating anyway, that question is irrelevant, no?  This
version may hit 'seen' but until the next version comes it will not
advance to 'next'.
Ryan Hendrickson Aug. 2, 2024, 7:39 p.m. UTC | #6
At 2024-08-02 12:28-0700, Junio C Hamano <gitster@pobox.com> sent:

>>>> Is this blocking feedback? This strikes me as speculative
>>>> over-engineering
>>>
>>> No, it is loosening a pattern that is overly tight and as a side
>>> effect shortening the line to more readable length ;-).
>>
>> Blocking or not?
>
> If we are updating anyway, that question is irrelevant, no?  This
> version may hit 'seen' but until the next version comes it will not
> advance to 'next'.

I can't figure out what you mean by this so I am going to proceed as if 
you had simply said ‘non-blocking’.

R
Junio C Hamano Aug. 2, 2024, 9:13 p.m. UTC | #7
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

> At 2024-08-02 12:28-0700, Junio C Hamano <gitster@pobox.com> sent:
>
>>>>> Is this blocking feedback? This strikes me as speculative
>>>>> over-engineering
>>>>
>>>> No, it is loosening a pattern that is overly tight and as a side
>>>> effect shortening the line to more readable length ;-).
>>>
>>> Blocking or not?
>>
>> If we are updating anyway, that question is irrelevant, no?  This
>> version may hit 'seen' but until the next version comes it will not
>> advance to 'next'.
>
> I can't figure out what you mean by this so I am going to proceed as
> if you had simply said ‘non-blocking’.

It does not make much sense to ask if a suggestion is "blocking" or
"non-blocking".  If you respond with a reasonable explanation why
you do not want to take a suggestion, I may (or may not) say that
your reasoning makes sense.  IOW, making me say "it is blocking"
means you want to me to say that I won't listen to you no matter
what you say.  That is rarely be the case.

In this case, I do not think it makes sense to insist with -Fx that
the error message has the exact message.  And I do not think your
"strikes me as" qualifies as a "reasonable explanation".
Ryan Hendrickson Aug. 2, 2024, 9:26 p.m. UTC | #8
At 2024-08-02 14:13-0700, Junio C Hamano <gitster@pobox.com> sent:

>>>>>> Is this blocking feedback? This strikes me as speculative
>>>>>> over-engineering
>>>>>
>>>>> No, it is loosening a pattern that is overly tight and as a side
>>>>> effect shortening the line to more readable length ;-).
>>>>
>>>> Blocking or not?
>>>
>>> If we are updating anyway, that question is irrelevant, no?  This
>>> version may hit 'seen' but until the next version comes it will not
>>> advance to 'next'.
>>
>> I can't figure out what you mean by this so I am going to proceed as
>> if you had simply said ‘non-blocking’.
>
> It does not make much sense to ask if a suggestion is "blocking" or
> "non-blocking".  If you respond with a reasonable explanation why
> you do not want to take a suggestion, I may (or may not) say that
> your reasoning makes sense.  IOW, making me say "it is blocking"
> means you want to me to say that I won't listen to you no matter
> what you say.  That is rarely be the case.

I want you to do what Documentation/ReviewingGuidelines.txt says reviewers 
should do:

- When providing a recommendation, be as clear as possible about whether you
   consider it "blocking" (the code would be broken or otherwise made worse if an
   issue isn't fixed) or "non-blocking" (the patch could be made better by taking
   the recommendation, but acceptance of the series does not require it).
   Non-blocking recommendations can be particularly ambiguous when they are
   related to - but outside the scope of - a series ("nice-to-have"s), or when
   they represent only stylistic differences between the author and reviewer.

Because I would rather not have what is likely to be a highly subjective 
argument about this particular choice in a test script if we don't have 
to have one.

I would also rather not put time into figuring out how best to rename the 
function "old_curl_version" if it no longer checks for the particular 
error produced when the curl version is too old, nor would I want to think 
ahead about whether it is correct for these tests that use this function 
to continue to pass if different variations on this error are raised. 
There is one qualifying error currently, and that's what the test matches 
against. Matching against hypothetical future errors is speculative 
overengineering if it is not obvious how the errors will vary and what it 
may mean if they do.

R
Junio C Hamano Aug. 2, 2024, 9:43 p.m. UTC | #9
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

> I would also rather not put time into figuring out how best to rename
> the function "old_curl_version" if it no longer checks for the
> particular error produced when the curl version is too old,

Now, that is a good argument to make sure the "libcurl 7.84 or later"
I suggested to omit for the sake of brevity is in the pattern.

Thanks.
Junio C Hamano Aug. 2, 2024, 9:47 p.m. UTC | #10
Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:

> what the test matches against. Matching against hypothetical future
> errors is speculative overengineering if it is not obvious how the
> errors will vary and what it may mean if they do.

The easiest you can imagine is that it turns out the cut-off version
of cURL for the feature turned out to be not 7.84, or versions of
cURL shipped by some distros have backports of the feature to an
older version that explicitly naming 7.84 causes more confusion than
naming the exact feature we rely on, and we end up rephrasing the
error message.  We have done such changes in the past, so it is not
really speculating "hypothetical future errors", but applying common
sense gained over years working on this project.
Ryan Hendrickson Aug. 2, 2024, 10:14 p.m. UTC | #11
At 2024-08-02 14:47-0700, Junio C Hamano <gitster@pobox.com> sent:

> Ryan Hendrickson <ryan.hendrickson@alum.mit.edu> writes:
>
>> what the test matches against. Matching against hypothetical future
>> errors is speculative overengineering if it is not obvious how the
>> errors will vary and what it may mean if they do.
>
> The easiest you can imagine is that it turns out the cut-off version
> of cURL for the feature turned out to be not 7.84, or versions of
> cURL shipped by some distros have backports of the feature to an
> older version that explicitly naming 7.84 causes more confusion than
> naming the exact feature we rely on, and we end up rephrasing the
> error message.  We have done such changes in the past, so it is not
> really speculating "hypothetical future errors", but applying common
> sense gained over years working on this project.

Okay, so is it a problem to also change the message in the test if that 
happens? My concern is that if I pick some fragment of the message to 
elide in the test, the message could still get reworded in a way that 
requires the test to be changed anyway. Even if not, the comment above it 
would likely need revision too; and if the test doesn't fail, whoever 
amends the message is unlikely to notice this.

The way the test is written in the current version of the patch, there is 
no ambiguity about what is being tested and what would need to change if 
the message changes. Future contributors can grep the code for references 
to that error message and find this test regardless of which part and how 
much of the message they choose to grep for. Shaving a dozen or so 
characters out of line is not more important than those considerations, is 
it?

R
diff mbox series

Patch

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 162b33fc52f..a14371b5c96 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -5,8 +5,8 @@  http.proxy::
 	proxy string with a user name but no password, in which case git will
 	attempt to acquire one in the same way it does for other credentials. See
 	linkgit:gitcredentials[7] for more information. The syntax thus is
-	'[protocol://][user[:password]@]proxyhost[:port]'. This can be overridden
-	on a per-remote basis; see remote.<name>.proxy
+	'[protocol://][user[:password]@]proxyhost[:port][/path]'. This can be
+	overridden on a per-remote basis; see remote.<name>.proxy
 +
 Any proxy, however configured, must be completely transparent and must not
 modify, transform, or buffer the request or response in any way.  Proxies which
diff --git a/http.c b/http.c
index 623ed234891..6c6cc5c822a 100644
--- a/http.c
+++ b/http.c
@@ -1227,6 +1227,8 @@  static CURL *get_curl_handle(void)
 		 */
 		curl_easy_setopt(result, CURLOPT_PROXY, "");
 	} else if (curl_http_proxy) {
+		struct strbuf proxy = STRBUF_INIT;
+
 		if (starts_with(curl_http_proxy, "socks5h"))
 			curl_easy_setopt(result,
 				CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
@@ -1265,7 +1267,27 @@  static CURL *get_curl_handle(void)
 		if (!proxy_auth.host)
 			die("Invalid proxy URL '%s'", curl_http_proxy);
 
-		curl_easy_setopt(result, CURLOPT_PROXY, proxy_auth.host);
+		strbuf_addstr(&proxy, proxy_auth.host);
+		if (proxy_auth.path) {
+			curl_version_info_data *ver = curl_version_info(CURLVERSION_NOW);
+
+			if (ver->version_num < 0x075400)
+				die("libcurl 7.84 or later is required to support paths in proxy URLs");
+
+			if (!starts_with(proxy_auth.protocol, "socks"))
+				die("Invalid proxy URL '%s': only SOCKS proxies support paths",
+				    curl_http_proxy);
+
+			if (strcasecmp(proxy_auth.host, "localhost"))
+				die("Invalid proxy URL '%s': host must be localhost if a path is present",
+				    curl_http_proxy);
+
+			strbuf_addch(&proxy, '/');
+			strbuf_add_percentencode(&proxy, proxy_auth.path, 0);
+		}
+		curl_easy_setopt(result, CURLOPT_PROXY, proxy.buf);
+		strbuf_release(&proxy);
+
 		var_override(&curl_no_proxy, getenv("NO_PROXY"));
 		var_override(&curl_no_proxy, getenv("no_proxy"));
 		curl_easy_setopt(result, CURLOPT_NOPROXY, curl_no_proxy);
diff --git a/t/socks4-proxy.pl b/t/socks4-proxy.pl
new file mode 100644
index 00000000000..4c3a35c0083
--- /dev/null
+++ b/t/socks4-proxy.pl
@@ -0,0 +1,48 @@ 
+use strict;
+use IO::Select;
+use IO::Socket::UNIX;
+use IO::Socket::INET;
+
+my $path = shift;
+
+unlink($path);
+my $server = IO::Socket::UNIX->new(Listen => 1, Local => $path)
+	or die "unable to listen on $path: $!";
+
+$| = 1;
+print "ready\n";
+
+while (my $client = $server->accept()) {
+	sysread $client, my $buf, 8;
+	my ($version, $cmd, $port, $ip) = unpack 'CCnN', $buf;
+	next unless $version == 4; # socks4
+	next unless $cmd == 1; # TCP stream connection
+
+	# skip NUL-terminated id
+	while (sysread $client, my $char, 1) {
+		last unless ord($char);
+	}
+
+	# version(0), reply(5a == granted), port (ignored), ip (ignored)
+	syswrite $client, "\x00\x5a\x00\x00\x00\x00\x00\x00";
+
+	my $remote = IO::Socket::INET->new(PeerHost => $ip, PeerPort => $port)
+		or die "unable to connect to $ip/$port: $!";
+
+	my $io = IO::Select->new($client, $remote);
+	while ($io->count) {
+		for my $fh ($io->can_read(0)) {
+			for my $pair ([$client, $remote], [$remote, $client]) {
+				my ($from, $to) = @$pair;
+				next unless $fh == $from;
+
+				my $r = sysread $from, my $buf, 1024;
+				if (!defined $r || $r <= 0) {
+					$io->remove($from);
+					next;
+				}
+				syswrite $to, $buf;
+			}
+		}
+	}
+}
diff --git a/t/t5564-http-proxy.sh b/t/t5564-http-proxy.sh
index bb35b87071d..0d6cfebbfab 100755
--- a/t/t5564-http-proxy.sh
+++ b/t/t5564-http-proxy.sh
@@ -39,4 +39,53 @@  test_expect_success 'clone can prompt for proxy password' '
 	expect_askpass pass proxuser
 '
 
+start_socks() {
+	mkfifo socks_output &&
+	{
+		"$PERL_PATH" "$TEST_DIRECTORY/socks4-proxy.pl" "$1" >socks_output &
+		echo $! > "$TRASH_DIRECTORY/socks.pid"
+	} &&
+	read line <socks_output &&
+	test "$line" = ready
+}
+
+# The %30 tests that the correct amount of percent-encoding is applied to the
+# proxy string passed to curl.
+test_lazy_prereq SOCKS_PROXY 'test_have_prereq PERL && start_socks "$TRASH_DIRECTORY/%30.sock"'
+
+test_atexit 'test ! -e "$TRASH_DIRECTORY/socks.pid" || kill "$(cat "$TRASH_DIRECTORY/socks.pid")"'
+
+# The below tests morally ought to be gated on a prerequisite that Git is
+# linked with a libcurl that supports Unix socket paths for proxies (7.84 or
+# later), but this is not easy to test right now. Instead, we || the tests with
+# this function.
+old_libcurl_error() {
+	grep -Fx "fatal: libcurl 7.84 or later is required to support paths in proxy URLs" "$1"
+}
+
+test_expect_success SOCKS_PROXY 'clone via Unix socket' '
+	test_when_finished "rm -rf clone" &&
+	test_config_global http.proxy "socks4://localhost$PWD/%2530.sock" && {
+		{
+			GIT_TRACE_CURL=$PWD/trace git clone "$HTTPD_URL/smart/repo.git" clone 2>err &&
+			grep -i "SOCKS4 request granted" trace
+		} ||
+		old_libcurl_error err
+	}
+'
+
+test_expect_success 'Unix socket requires socks*:' - <<\EOT
+	! git clone -c http.proxy=localhost/path https://example.com/repo.git 2>err && {
+		grep -Fx "fatal: Invalid proxy URL 'localhost/path': only SOCKS proxies support paths" err ||
+		old_libcurl_error err
+	}
+EOT
+
+test_expect_success 'Unix socket requires localhost' - <<\EOT
+	! git clone -c http.proxy=socks4://127.0.0.1/path https://example.com/repo.git 2>err && {
+		grep -Fx "fatal: Invalid proxy URL 'socks4://127.0.0.1/path': host must be localhost if a path is present" err ||
+		old_libcurl_error err
+	}
+EOT
+
 test_done