diff mbox series

[v2,6/6] t5556-http-auth: add test for HTTP auth hdr logic

Message ID f3f13ed8c8238f396163dd0e6a3d6c948c2b879b.1666372083.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Enhance credential helper protocol to include auth headers | expand

Commit Message

Matthew John Cheetham Oct. 21, 2022, 5:08 p.m. UTC
From: Matthew John Cheetham <mjcheetham@outlook.com>

Add a series of tests to exercise the HTTP authentication header parsing
and the interop with credential helpers. Credential helpers can respond
to requests that contain WWW-Authenticate information with the ability
to select the response Authenticate header scheme.

Introduce a mini HTTP server helper that provides a frontend for the
git-http-backend, with support for arbitrary authentication schemes.
The test-http-server is based heavily on the git-daemon, and forwards
all successfully authenticated requests to the http-backend.

Signed-off-by: Matthew John Cheetham <mjcheetham@outlook.com>
---
 Makefile                                  |    2 +
 contrib/buildsystems/CMakeLists.txt       |   13 +
 t/helper/.gitignore                       |    1 +
 t/helper/test-credential-helper-replay.sh |   14 +
 t/helper/test-http-server.c               | 1134 +++++++++++++++++++++
 t/t5556-http-auth.sh                      |  260 +++++
 6 files changed, 1424 insertions(+)
 create mode 100755 t/helper/test-credential-helper-replay.sh
 create mode 100644 t/helper/test-http-server.c
 create mode 100755 t/t5556-http-auth.sh

Comments

Derrick Stolee Oct. 28, 2022, 3:08 p.m. UTC | #1
On 10/21/22 1:08 PM, Matthew John Cheetham via GitGitGadget wrote:
> From: Matthew John Cheetham <mjcheetham@outlook.com>

> @@ -1500,6 +1500,8 @@ else
>  	endif
>  	BASIC_CFLAGS += $(CURL_CFLAGS)
>  
> +	TEST_PROGRAMS_NEED_X += test-http-server
> +
>  	REMOTE_CURL_PRIMARY = git-remote-http$X
>  	REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X
>  	REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES)

This hunk is in the "else" block of "ifdef NO_CURL",
so this makes sense for why TEST_PROGRAMS_NEED_X is
augmented here, away from other instances.

> diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
> index 787738e6fa3..45251695ce0 100644
> --- a/contrib/buildsystems/CMakeLists.txt
> +++ b/contrib/buildsystems/CMakeLists.txt
> @@ -989,6 +989,19 @@ set(wrapper_scripts
>  set(wrapper_test_scripts
>  	test-fake-ssh test-tool)
>  
> +if(CURL_FOUND)
> +       list(APPEND wrapper_test_scripts test-http-server)
> +
> +       add_executable(test-http-server ${CMAKE_SOURCE_DIR}/t/helper/test-http-server.c)
> +       target_link_libraries(test-http-server common-main)
> +
> +       if(MSVC)
> +               set_target_properties(test-http-server
> +                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/t/helper)
> +               set_target_properties(test-http-server
> +                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/t/helper)
> +       endif()
> +endif()

And this file has the pattern of many "if(CURL_FOUND)"
blocks with isolated purposes, so it makes sense to
have this be an isolated change instead of grouped with
a different case.

> diff --git a/t/helper/.gitignore b/t/helper/.gitignore
> index 8c2ddcce95f..1a94ab6eed5 100644
> --- a/t/helper/.gitignore
> +++ b/t/helper/.gitignore
> @@ -1,2 +1,3 @@
>  /test-tool
>  /test-fake-ssh
> +test-http-server

Should this start with a "/" like the other entries?

> diff --git a/t/helper/test-credential-helper-replay.sh b/t/helper/test-credential-helper-replay.sh
> new file mode 100755
> index 00000000000..03e5e63dad6
> --- /dev/null
> +++ b/t/helper/test-credential-helper-replay.sh
> @@ -0,0 +1,14 @@
> +cmd=$1
> +teefile=$cmd-actual.cred
> +catfile=$cmd-response.cred
> +rm -f $teefile
> +while read line;
> +do
> +	if test -z "$line"; then
> +		break;
> +	fi
> +	echo "$line" >> $teefile
> +done
> +if test "$cmd" = "get"; then
> +	cat $catfile
> +fi

Should this be a helper method within another script, such
as t/lib-credential.sh or t/lib-httpd.sh? The read over
stdin will still work, as in this example:

read_chunk() {
	while read line; do
		case "$line" in
		--) break ;;
		*) echo "$line" ;;
		esac
	done
}

> diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c

> @@ -0,0 +1,1134 @@
> +#include "config.h"
> +#include "run-command.h"
> +#include "strbuf.h"
> +#include "string-list.h"
> +#include "trace2.h"
> +#include "version.h"
> +#include "dir.h"
> +#include "date.h"
> +
> +#define TR2_CAT "test-http-server"
> +
> +static const char *pid_file;
> +static int verbose;
> +static int reuseaddr;
> +
> +static const char test_http_auth_usage[] =
> +"http-server [--verbose]\n"
> +"           [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
> +"           [--reuseaddr] [--pid-file=<file>]\n"
> +"           [--listen=<host_or_ipaddr>]* [--port=<n>]\n"
> +"           [--anonymous-allowed]\n"
> +"           [--auth=<scheme>[:<params>] [--auth-token=<scheme>:<token>]]*\n"
> +;

These are a lot of options to implement all at once. They are probably
simple enough, but depending on the implementation and tests, it might
be helpful to split this patch into smaller ones that introduce these
options along with the tests that exercise each. That will help
verify that they are being tested properly instead of needing to track
back and forth across the patch for each one.

> +
> +/* Timeout, and initial timeout */
> +static unsigned int timeout;
> +static unsigned int init_timeout;
> +
> +static void logreport(const char *label, const char *err, va_list params)
> +{
> +	struct strbuf msg = STRBUF_INIT;
> +
> +	strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label);
> +	strbuf_vaddf(&msg, err, params);
> +	strbuf_addch(&msg, '\n');
> +
> +	fwrite(msg.buf, sizeof(char), msg.len, stderr);
> +	fflush(stderr);
> +
> +	strbuf_release(&msg);
> +}
> +
> +__attribute__((format (printf, 1, 2)))
> +static void logerror(const char *err, ...)
> +{
> +	va_list params;
> +	va_start(params, err);
> +	logreport("error", err, params);
> +	va_end(params);
> +}
> +
> +__attribute__((format (printf, 1, 2)))
> +static void loginfo(const char *err, ...)
> +{
> +	va_list params;
> +	if (!verbose)
> +		return;
> +	va_start(params, err);
> +	logreport("info", err, params);
> +	va_end(params);
> +}

I wonder how much of this we need or is just a nice thing. I would
err on the side of making things as simple as possible, but being
able to debug this test server may be important based on your
experience.

> +static void set_keep_alive(int sockfd)
> +{
> +	int ka = 1;
> +
> +	if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) {
> +		if (errno != ENOTSOCK)
> +			logerror("unable to set SO_KEEPALIVE on socket: %s",
> +				strerror(errno));
> +	}
> +}
> +
> +//////////////////////////////////////////////////////////////////
> +// The code in this section is used by "worker" instances to service
> +// a single connection from a client.  The worker talks to the client
> +// on 0 and 1.
> +//////////////////////////////////////////////////////////////////

Use /* */ style comments. You can repeat the asterisks to get a
similar visual block.

> +
> +enum worker_result {
> +	/*
> +	 * Operation successful.
> +	 * Caller *might* keep the socket open and allow keep-alive.
> +	 */
> +	WR_OK       = 0,
> +	/*
> +	 * Various errors while processing the request and/or the response.
> +	 * Close the socket and clean up.
> +	 * Exit child-process with non-zero status.
> +	 */
> +	WR_IO_ERROR = 1<<0,
> +	/*
> +	 * Close the socket and clean up.  Does not imply an error.
> +	 */
> +	WR_HANGUP   = 1<<1,

nit: add a whitespace line between an item and the next
item's comment.

> +
> +	WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP),
> +};

(I read, but have no comments on the http-server boilerplate.)

> +
> +enum auth_result {
> +	AUTH_UNKNOWN = 0,
> +	AUTH_DENY = 1,
> +	AUTH_ALLOW = 2,
> +};
> +
> +struct auth_module {
> +	const char *scheme;
> +	const char *challenge_params;

Later, I notice that you set challenge_params using an
xstrdup() so this shouldn't be const and you should
free it in any freeing code.

> +	struct string_list *tokens;
> +};
> +
> +static int allow_anonymous;
> +static struct auth_module **auth_modules = NULL;
> +static size_t auth_modules_nr = 0;
> +static size_t auth_modules_alloc = 0;

So, we are setting up a number of potential auth modules,
each of which has a scheme to match a request to the module,
and a list of tokens that would be considered worthy of the
AUTH_ALLOW result. Otherwise, if the scheme matches but no
token matches, we get AUTH_DENY. Finally, if no scheme matches
we get AUTH_UNKNOWN.

This concept might be worth a comment here around the data
structures before we get into how that is implemented.

> +static struct auth_module *get_auth_module(struct strbuf *scheme)
> +{
> +	int i;
> +	struct auth_module *mod;
> +	for (i = 0; i < auth_modules_nr; i++) {
> +		mod = auth_modules[i];
> +		if (!strcasecmp(mod->scheme, scheme->buf))
> +			return mod;
> +	}
> +
> +	return NULL;
> +}

Matching the input scheme against the list of modules.

Only complaint: there is no reason that 'scheme' needs t
be a strbuf, but could be a 'const char *' here.

> +static void add_auth_module(struct auth_module *mod)
> +{
> +	ALLOC_GROW(auth_modules, auth_modules_nr + 1, auth_modules_alloc);
> +	auth_modules[auth_modules_nr++] = mod;
> +}

nit: this could be located earlier, next to the list
definition, or delayed until it is needed. That would
allow get_auth_module() to be closer to its first use.

> +static int is_authed(struct req *req, const char **user, enum worker_result *wr)
> +{
> +	enum auth_result result = AUTH_UNKNOWN;
> +	struct string_list hdrs = STRING_LIST_INIT_NODUP;
> +	struct auth_module *mod;
> +
> +	struct string_list_item *hdr;
> +	struct string_list_item *token;
> +	const char *v;
> +	struct strbuf **split = NULL;
> +	int i;
> +	char *challenge;
> +
> +	/* ask all auth modules to validate the request */
> +	for_each_string_list_item(hdr, &req->header_list) {
> +		if (skip_iprefix(hdr->string, "Authorization: ", &v)) {
> +			split = strbuf_split_str(v, ' ', 2);
> +			if (!split[0] || !split[1]) continue;

For each valid request header...

> +			// trim trailing space ' '
> +			strbuf_setlen(split[0], split[0]->len - 1);
> +
> +			mod = get_auth_module(split[0]);
> +			if (mod) {

...get an appropriate module, if it exists...

> +
> +				for_each_string_list_item(token, mod->tokens) {
> +					if (!strcmp(split[1]->buf, token->string)) {
> +						result = AUTH_ALLOW;
> +						goto done;
> +					}
> +				}
> +
> +				if (result != AUTH_UNKNOWN)
> +					goto done;

...and report if we find a valid token.

Here, it seems I was wrong in my expectation of AUTH_DENY:
if a matching module exists but no token exists in that
module, then we keep searching other modules. 

> +			}
> +		}
> +	}
> +
> +done:
> +	switch (result) {
> +	case AUTH_ALLOW:
> +		trace2_printf("%s: auth '%s' ALLOW", TR2_CAT, mod->scheme);
> +		*user = "VALID_TEST_USER";
> +		*wr = WR_OK;
> +		break;
> +
> +	case AUTH_DENY:
> +		trace2_printf("%s: auth '%s' DENY", TR2_CAT, mod->scheme);
> +		/* fall-through */

I'm not sure that I see a case where this is possible. Maybe
we should have a 'result = AUTH_DENY' at the start of the
"if (mod)" block, followed by a 'goto done' in all cases
instead of "if (result != AUTH_UNKNOWN)"?

> +	case AUTH_UNKNOWN:
> +		if (allow_anonymous)
> +			break;

If we do not require auth, then we want to continue if there
is no matching authentication.

> +		for (i = 0; i < auth_modules_nr; i++) {
> +			mod = auth_modules[i];
> +			if (mod->challenge_params)
> +				challenge = xstrfmt("WWW-Authenticate: %s %s",
> +						    mod->scheme,
> +						    mod->challenge_params);
> +			else
> +				challenge = xstrfmt("WWW-Authenticate: %s",
> +						    mod->scheme);
> +			string_list_append(&hdrs, challenge);
> +		}
> +		*wr = send_http_error(1, 401, "Unauthorized", -1, &hdrs, *wr);

However, here is the critical piece about how servers will
start to act with the new WWW-Authenticate header usage in
the Git credential helper interface. This will be critical
in the testing for Git to retry the credential helper while
passing these authentications schemes from the installed
modules.

> +	}
> +
> +	strbuf_list_free(split);
> +	string_list_clear(&hdrs, 0);
> +
> +	return result == AUTH_ALLOW ||
> +	      (result == AUTH_UNKNOWN && allow_anonymous);

Did it work? Or did it not need to work? I'm interested to
investigate the case that the client sent an authentication
header that matches a module but doesn't match any tokens,
but we allow anonymous access, anyway. Is that a 400? Or
is that a 401?

> +static enum worker_result dispatch(struct req *req)
> +{
> +	enum worker_result wr = WR_OK;
> +	const char *user = NULL;
> +
> +	if (!is_authed(req, &user, &wr))
> +		return wr;

If we are not authed, send the 401 response.

> +	if (is_git_request(req))
> +		return do__git(req, user);

If we are authed, then pass through to the Git response.

> +	return send_http_error(1, 501, "Not Implemented", -1, NULL,
> +			       WR_OK | WR_HANGUP);

If the Git request fails, we don't care. This is a test.
Just pass a 500-level error and the client will barf,
letting us know that something went wrong.

> +static void kill_some_child(void)

> +static void check_dead_children(void)

These technically sound methods have unfortunate names.
Using something like "connection" over "child" might
alleviate some of the horror. (I initially wanted to
suggest "subprocess" but you compare live_children to
max_connections in the next method, so connection seemed
appropriate.)

> +static struct strvec cld_argv = STRVEC_INIT;
> +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
> +{
> +	struct child_process cld = CHILD_PROCESS_INIT;
> +
> +	if (max_connections && live_children >= max_connections) {
> +		kill_some_child();
> +		sleep(1);  /* give it some time to die */
> +		check_dead_children();
> +		if (live_children >= max_connections) {
> +			close(incoming);
> +			logerror("Too many children, dropping connection");
> +			return;
> +		}
> +	}

Do we anticipate exercising concurrent requests in our
tests? Perhaps it's not worth putting a cap on the
connection count so we can keep the test helpers simple.

> +	if (addr->sa_family == AF_INET) {
> +		char buf[128] = "";
> +		struct sockaddr_in *sin_addr = (void *) addr;
> +		inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf));
> +		strvec_pushf(&cld.env, "REMOTE_ADDR=%s", buf);
> +		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
> +				 ntohs(sin_addr->sin_port));
> +#ifndef NO_IPV6
> +	} else if (addr->sa_family == AF_INET6) {
> +		char buf[128] = "";
> +		struct sockaddr_in6 *sin6_addr = (void *) addr;
> +		inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf));
> +		strvec_pushf(&cld.env, "REMOTE_ADDR=[%s]", buf);
> +		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
> +				 ntohs(sin6_addr->sin6_port));
> +#endif
> +	}
> +
> +	strvec_pushv(&cld.args, cld_argv.v);
> +	cld.in = incoming;
> +	cld.out = dup(incoming);
> +
> +	if (cld.out < 0)
> +		logerror("could not dup() `incoming`");
> +	else if (start_command(&cld))
> +		logerror("unable to fork");
> +	else
> +		add_child(&cld, addr, addrlen);
> +}
> +

I scanned the socket creation code, but my eyes were
glazing over. I'm definitely in the camp of "if it works,
that's enough for our tests." If we start to rely on this
test harness in more places, we can improve any shortcomings
as they arise.

> +//////////////////////////////////////////////////////////////////
> +// This section is executed by both the primary instance and all
> +// worker instances.  So, yes, each child-process re-parses the
> +// command line argument and re-discovers how it should behave.
> +//////////////////////////////////////////////////////////////////
> +
> +int cmd_main(int argc, const char **argv)
> +{
> +	int listen_port = 0;
> +	struct string_list listen_addr = STRING_LIST_INIT_NODUP;
> +	int worker_mode = 0;
> +	int i;
> +	struct auth_module *mod = NULL;
> +
> +	trace2_cmd_name("test-http-server");
> +	setup_git_directory_gently(NULL);
> +
> +	for (i = 1; i < argc; i++) {
> +		const char *arg = argv[i];
> +		const char *v;
> +
> +		if (skip_prefix(arg, "--listen=", &v)) {
> +			string_list_append(&listen_addr, xstrdup_tolower(v));
> +			continue;
> +		}
> +		if (skip_prefix(arg, "--port=", &v)) {
> +			char *end;
> +			unsigned long n;
> +			n = strtoul(v, &end, 0);
> +			if (*v && !*end) {
> +				listen_port = n;
> +				continue;
> +			}
> +		}
> +		if (!strcmp(arg, "--worker")) {
> +			worker_mode = 1;
> +			trace2_cmd_mode("worker");
> +			continue;
> +		}
> +		if (!strcmp(arg, "--verbose")) {
> +			verbose = 1;
> +			continue;
> +		}
> +		if (skip_prefix(arg, "--timeout=", &v)) {
> +			timeout = atoi(v);
> +			continue;
> +		}
> +		if (skip_prefix(arg, "--init-timeout=", &v)) {
> +			init_timeout = atoi(v);
> +			continue;
> +		}
> +		if (skip_prefix(arg, "--max-connections=", &v)) {
> +			max_connections = atoi(v);
> +			if (max_connections < 0)
> +				max_connections = 0; /* unlimited */
> +			continue;
> +		}
> +		if (!strcmp(arg, "--reuseaddr")) {
> +			reuseaddr = 1;
> +			continue;
> +		}
> +		if (skip_prefix(arg, "--pid-file=", &v)) {
> +			pid_file = v;
> +			continue;
> +		}

ok, most of these arguments are actually about the per-connection
subprocesses.

> +		if (skip_prefix(arg, "--allow-anonymous", &v)) {
> +			allow_anonymous = 1;
> +			continue;
> +		}

Here is how we choose to allo anonymous access.

> +		if (skip_prefix(arg, "--auth=", &v)) {
> +			struct strbuf **p = strbuf_split_str(v, ':', 2);
> +
> +			if (!p[0]) {
> +				error("invalid argument '%s'", v);
> +				usage(test_http_auth_usage);
> +			}
> +
> +			// trim trailing ':'
> +			if (p[1])
> +				strbuf_setlen(p[0], p[0]->len - 1);
> +
> +			if (get_auth_module(p[0])) {
> +				error("duplicate auth scheme '%s'\n", p[0]->buf);
> +				usage(test_http_auth_usage);
> +			}
> +
> +			mod = xmalloc(sizeof(struct auth_module));
> +			mod->scheme = xstrdup(p[0]->buf);
> +			mod->challenge_params = p[1] ? xstrdup(p[1]->buf) : NULL;

Here, you xstrdup() into a 'const char *', but you are really
passing ownership so it shouldn't be conts.

> +			mod->tokens = xmalloc(sizeof(struct string_list));

nit: this could also be "CALLOC_ARRAY(mod->tokens, 1);"

> +			string_list_init_dup(mod->tokens);
> +
> +			add_auth_module(mod);
> +
> +			strbuf_list_free(p);
> +			continue;

Ok, we gain the auth schemes from the command line.

> +		}
> +		if (skip_prefix(arg, "--auth-token=", &v)) {
> +			struct strbuf **p = strbuf_split_str(v, ':', 2);
> +			if (!p[0]) {
> +				error("invalid argument '%s'", v);
> +				usage(test_http_auth_usage);
> +			}
> +
> +			if (!p[1]) {
> +				error("missing token value '%s'\n", v);
> +				usage(test_http_auth_usage);
> +			}
> +
> +			// trim trailing ':'

Use /* */ (Aside: I'm surprised we don't have a build option in
DEVELOPER=1 that catches the use of these comments.)

> +			strbuf_setlen(p[0], p[0]->len - 1);
> +
> +			mod = get_auth_module(p[0]);
> +			if (!mod) {
> +				error("auth scheme not defined '%s'\n", p[0]->buf);
> +				usage(test_http_auth_usage);
> +			}
> +
> +			string_list_append(mod->tokens, p[1]->buf);
> +			strbuf_list_free(p);
> +			continue;
> +		}

And the token lists. It is important that the scheme is added
before any token is added.

> +		fprintf(stderr, "error: unknown argument '%s'\n", arg);
> +		usage(test_http_auth_usage);
> +	}
> +
> +	/* avoid splitting a message in the middle */
> +	setvbuf(stderr, NULL, _IOFBF, 4096);
> +
> +	if (listen_port == 0)
> +		listen_port = DEFAULT_GIT_PORT;
> +
> +	/*
> +	 * If no --listen=<addr> args are given, the setup_named_sock()
> +	 * code will use receive a NULL address and set INADDR_ANY.
> +	 * This exposes both internal and external interfaces on the
> +	 * port.
> +	 *
> +	 * Disallow that and default to the internal-use-only loopback
> +	 * address.
> +	 */
> +	if (!listen_addr.nr)
> +		string_list_append(&listen_addr, "127.0.0.1");
> +
> +	/*
> +	 * worker_mode is set in our own child process instances
> +	 * (that are bound to a connected socket from a client).
> +	 */
> +	if (worker_mode)
> +		return worker();
> +
> +	/*
> +	 * `cld_argv` is a bit of a clever hack. The top-level instance
> +	 * of test-http-server does the normal bind/listen/accept stuff.
> +	 * For each incoming socket, the top-level process spawns
> +	 * a child instance of test-http-server *WITH* the additional
> +	 * `--worker` argument. This causes the child to set `worker_mode`
> +	 * and immediately call `worker()` using the connected socket (and
> +	 * without the usual need for fork() or threads).
> +	 *
> +	 * The magic here is made possible because `cld_argv` is static
> +	 * and handle() (called by service_loop()) knows about it.
> +	 */
> +	strvec_push(&cld_argv, argv[0]);
> +	strvec_push(&cld_argv, "--worker");
> +	for (i = 1; i < argc; ++i)
> +		strvec_push(&cld_argv, argv[i]);
> +
> +	/*
> +	 * Setup primary instance to listen for connections.
> +	 */
> +	return serve(&listen_addr, listen_port);
> +}

And complete the thing with some boilerplate.

This was a lot to read, and the interesting bits are all mixed in
with the http server code, which is less interesting to what we
are trying to accomplish. It would be beneficial to split this
into one or two patches before we actually introduce the tests.

The most important thing that I think would be helpful is to
isolate all the authentication behavior into its own patch so
we can see how those connections from the command-line arguments
affect the behavior of the server responses.

I think ideally we would have the following split:

 1. All server boilerblate. All requests 500 not-implemented.

 2. Add Git fall-through with no authentication. Add the tests
    that are intended to allow anonymous auth.

 3. Add authentication data structures read from command-line,
    but not processed at all in the logic.

 4. Act on the authentication data structures to alter the
    requests. Add the tests that use these authentication
    schemes.

I could easily see a case for combining 1&2 as well as 3&4,
for slightly larger but more completely-testable changes at
every step.

From what I read, I don't think there is much to change in
the end result of the code, but it definitely was hard to read
the important things when surrounded by many lines of
boilerplate.

> diff --git a/t/t5556-http-auth.sh b/t/t5556-http-auth.sh

I'm going to pause here and come back to the test script in
a separate reply.

Thanks,
-Stolee
Jeff Hostetler Oct. 28, 2022, 7:14 p.m. UTC | #2
On 10/28/22 11:08 AM, Derrick Stolee wrote:
> }
> 
>> diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c
> 
>> @@ -0,0 +1,1134 @@
>> +#include "config.h"
>> +#include "run-command.h"
>> +#include "strbuf.h"
>> +#include "string-list.h"
>> +#include "trace2.h"
>> +#include "version.h"
>> +#include "dir.h"
>> +#include "date.h"
>> +
>> +#define TR2_CAT "test-http-server"
>> +
>> +static const char *pid_file;
>> +static int verbose;
>> +static int reuseaddr;
>> +
>> +static const char test_http_auth_usage[] =
>> +"http-server [--verbose]\n"
>> +"           [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
>> +"           [--reuseaddr] [--pid-file=<file>]\n"
>> +"           [--listen=<host_or_ipaddr>]* [--port=<n>]\n"
>> +"           [--anonymous-allowed]\n"
>> +"           [--auth=<scheme>[:<params>] [--auth-token=<scheme>:<token>]]*\n"
>> +;
> 
> These are a lot of options to implement all at once. They are probably
> simple enough, but depending on the implementation and tests, it might
> be helpful to split this patch into smaller ones that introduce these
> options along with the tests that exercise each. That will help
> verify that they are being tested properly instead of needing to track
> back and forth across the patch for each one.

how many of these options were inherited from test-gvfs-protocol or
from upstream git-daemon?  If most came from git-daemon, it's probably
easier to see that this was a cut-n-paste from it if it comes over in
one commit, since all of the OPT_ processing, usage(), and static global
state vars will come over together I would think -- rather than to build
up the arg parsing bit by bit.  More on this in a minute...


>> +
>> +/* Timeout, and initial timeout */
>> +static unsigned int timeout;
>> +static unsigned int init_timeout;
>> +
>> +static void logreport(const char *label, const char *err, va_list params)
>> +{
>> +	struct strbuf msg = STRBUF_INIT;
>> +
>> +	strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label);
>> +	strbuf_vaddf(&msg, err, params);
>> +	strbuf_addch(&msg, '\n');
>> +
>> +	fwrite(msg.buf, sizeof(char), msg.len, stderr);
>> +	fflush(stderr);
>> +
>> +	strbuf_release(&msg);
>> +}
>> +
>> +__attribute__((format (printf, 1, 2)))
>> +static void logerror(const char *err, ...)
>> +{
>> +	va_list params;
>> +	va_start(params, err);
>> +	logreport("error", err, params);
>> +	va_end(params);
>> +}
>> +
>> +__attribute__((format (printf, 1, 2)))
>> +static void loginfo(const char *err, ...)
>> +{
>> +	va_list params;
>> +	if (!verbose)
>> +		return;
>> +	va_start(params, err);
>> +	logreport("info", err, params);
>> +	va_end(params);
>> +}

...Maybe it would be easier to see/diff this large new test server
if we copied `daemon.c` into this source file in 1 commit and then
converted it to what you have now in 1 commit -- so that only new
code shows up here.  For example, all of the above logreport, logerror,
and loginfo routines would show up as new in the copy commit, but not
in the edit commit.  However, that may lead to too much noise when
you actually get into the meat of the auth changes, maybe.


> I wonder how much of this we need or is just a nice thing. I would
> err on the side of making things as simple as possible, but being
> able to debug this test server may be important based on your
> experience.

i'd vote to keep it.

[...]
>> +static void kill_some_child(void)
> 
>> +static void check_dead_children(void)
> 
> These technically sound methods have unfortunate names.
> Using something like "connection" over "child" might
> alleviate some of the horror. (I initially wanted to
> suggest "subprocess" but you compare live_children to
> max_connections in the next method, so connection seemed
> appropriate.)

These names were inherited from `daemon.c` IIRC. I wouldn't change
them since it'll just introduce noise when diffing.  Especially,
if we do the copy commit first.


[...]
>> +static struct strvec cld_argv = STRVEC_INIT;
>> +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
>> +{
>> +	struct child_process cld = CHILD_PROCESS_INIT;
>> +
>> +	if (max_connections && live_children >= max_connections) {
>> +		kill_some_child();
>> +		sleep(1);  /* give it some time to die */
>> +		check_dead_children();
>> +		if (live_children >= max_connections) {
>> +			close(incoming);
>> +			logerror("Too many children, dropping connection");
>> +			return;
>> +		}
>> +	}
> 
> Do we anticipate exercising concurrent requests in our
> tests? Perhaps it's not worth putting a cap on the
> connection count so we can keep the test helpers simple.

again, this code was inherited from `daemon.c`, so we could leave it.

[...]
>> +			mod = xmalloc(sizeof(struct auth_module));
>> +			mod->scheme = xstrdup(p[0]->buf);
>> +			mod->challenge_params = p[1] ? xstrdup(p[1]->buf) : NULL;
> 
> Here, you xstrdup() into a 'const char *', but you are really
> passing ownership so it shouldn't be conts.

There is a strbuf_detach() that will let you steal the buffer from the
strbuf if that would help.


[...]
> This was a lot to read, and the interesting bits are all mixed in
> with the http server code, which is less interesting to what we
> are trying to accomplish. It would be beneficial to split this
> into one or two patches before we actually introduce the tests.

agreed. it is big, but it does make sense.  perhaps doing the
copy daemon.c commit and then see how this commit diffs from it
would make it more manageable. (not sure, but worth a try.)

[...]
>  From what I read, I don't think there is much to change in
> the end result of the code, but it definitely was hard to read
> the important things when surrounded by many lines of
> boilerplate.

agreed. i think the end result is good.

Thanks
Jeff
Matthew John Cheetham Nov. 1, 2022, 11:14 p.m. UTC | #3
On 2022-10-28 12:14, Jeff Hostetler wrote:
>
>
> On 10/28/22 11:08 AM, Derrick Stolee wrote:
>> }
>>
>>> diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c
>>
>>> @@ -0,0 +1,1134 @@
>>> +#include "config.h"
>>> +#include "run-command.h"
>>> +#include "strbuf.h"
>>> +#include "string-list.h"
>>> +#include "trace2.h"
>>> +#include "version.h"
>>> +#include "dir.h"
>>> +#include "date.h"
>>> +
>>> +#define TR2_CAT "test-http-server"
>>> +
>>> +static const char *pid_file;
>>> +static int verbose;
>>> +static int reuseaddr;
>>> +
>>> +static const char test_http_auth_usage[] =
>>> +"http-server [--verbose]\n"
>>> +"           [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
>>> +"           [--reuseaddr] [--pid-file=<file>]\n"
>>> +"           [--listen=<host_or_ipaddr>]* [--port=<n>]\n"
>>> +"           [--anonymous-allowed]\n"
>>> +"           [--auth=<scheme>[:<params>] [--auth-token=<scheme>:<token>]]*\n"
>>> +;
>>
>> These are a lot of options to implement all at once. They are probably
>> simple enough, but depending on the implementation and tests, it might
>> be helpful to split this patch into smaller ones that introduce these
>> options along with the tests that exercise each. That will help
>> verify that they are being tested properly instead of needing to track
>> back and forth across the patch for each one.
>
> how many of these options were inherited from test-gvfs-protocol or
> from upstream git-daemon?  If most came from git-daemon, it's probably
> easier to see that this was a cut-n-paste from it if it comes over in
> one commit, since all of the OPT_ processing, usage(), and static global
> state vars will come over together I would think -- rather than to build
> up the arg parsing bit by bit.  More on this in a minute...
>

Only --anonymous-allowed, --auth and --auth-token are added over git-daemon.

>
>>> +
>>> +/* Timeout, and initial timeout */
>>> +static unsigned int timeout;
>>> +static unsigned int init_timeout;
>>> +
>>> +static void logreport(const char *label, const char *err, va_list params)
>>> +{
>>> +    struct strbuf msg = STRBUF_INIT;
>>> +
>>> +    strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label);
>>> +    strbuf_vaddf(&msg, err, params);
>>> +    strbuf_addch(&msg, '\n');
>>> +
>>> +    fwrite(msg.buf, sizeof(char), msg.len, stderr);
>>> +    fflush(stderr);
>>> +
>>> +    strbuf_release(&msg);
>>> +}
>>> +
>>> +__attribute__((format (printf, 1, 2)))
>>> +static void logerror(const char *err, ...)
>>> +{
>>> +    va_list params;
>>> +    va_start(params, err);
>>> +    logreport("error", err, params);
>>> +    va_end(params);
>>> +}
>>> +
>>> +__attribute__((format (printf, 1, 2)))
>>> +static void loginfo(const char *err, ...)
>>> +{
>>> +    va_list params;
>>> +    if (!verbose)
>>> +        return;
>>> +    va_start(params, err);
>>> +    logreport("info", err, params);
>>> +    va_end(params);
>>> +}
>
> ...Maybe it would be easier to see/diff this large new test server
> if we copied `daemon.c` into this source file in 1 commit and then
> converted it to what you have now in 1 commit -- so that only new
> code shows up here.  For example, all of the above logreport, logerror,
> and loginfo routines would show up as new in the copy commit, but not
> in the edit commit.  However, that may lead to too much noise when
> you actually get into the meat of the auth changes, maybe.

I take from git-daemon and the test-gvfs-protocol helper from microsoft/git
fork, but then also delete lots of not required pieces too just as much as
I have added. Copying git-daemon.c, to then delete, and then add feels like
lots of noise.

>> I wonder how much of this we need or is just a nice thing. I would
>> err on the side of making things as simple as possible, but being
>> able to debug this test server may be important based on your
>> experience.
>
> i'd vote to keep it.
>
> [...]
>>> +static void kill_some_child(void)
>>
>>> +static void check_dead_children(void)
>>
>> These technically sound methods have unfortunate names.
>> Using something like "connection" over "child" might
>> alleviate some of the horror. (I initially wanted to
>> suggest "subprocess" but you compare live_children to
>> max_connections in the next method, so connection seemed
>> appropriate.)
>
> These names were inherited from `daemon.c` IIRC. I wouldn't change
> them since it'll just introduce noise when diffing.  Especially,
> if we do the copy commit first.

Indeed. These functions are untouched from daemon.c. I do plan to split
this mega-patch up however in to a single 'add the boilerplate' based on
git-daemon patch, then add the extra pieces like HTTP request parsing and
the auth pieces in a v3.

> [...]
>>> +static struct strvec cld_argv = STRVEC_INIT;
>>> +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
>>> +{
>>> +    struct child_process cld = CHILD_PROCESS_INIT;
>>> +
>>> +    if (max_connections && live_children >= max_connections) {
>>> +        kill_some_child();
>>> +        sleep(1);  /* give it some time to die */
>>> +        check_dead_children();
>>> +        if (live_children >= max_connections) {
>>> +            close(incoming);
>>> +            logerror("Too many children, dropping connection");
>>> +            return;
>>> +        }
>>> +    }
>>
>> Do we anticipate exercising concurrent requests in our
>> tests? Perhaps it's not worth putting a cap on the
>> connection count so we can keep the test helpers simple.
>
> again, this code was inherited from `daemon.c`, so we could leave it.
>
> [...]
>>> +            mod = xmalloc(sizeof(struct auth_module));
>>> +            mod->scheme = xstrdup(p[0]->buf);
>>> +            mod->challenge_params = p[1] ? xstrdup(p[1]->buf) : NULL;
>>
>> Here, you xstrdup() into a 'const char *', but you are really
>> passing ownership so it shouldn't be conts.
>
> There is a strbuf_detach() that will let you steal the buffer from the
> strbuf if that would help.

Will update in v3 to drop the const.

> [...]
>> This was a lot to read, and the interesting bits are all mixed in
>> with the http server code, which is less interesting to what we
>> are trying to accomplish. It would be beneficial to split this
>> into one or two patches before we actually introduce the tests.
>
> agreed. it is big, but it does make sense.  perhaps doing the
> copy daemon.c commit and then see how this commit diffs from it
> would make it more manageable. (not sure, but worth a try.)
>
> [...]
>>  From what I read, I don't think there is much to change in
>> the end result of the code, but it definitely was hard to read
>> the important things when surrounded by many lines of
>> boilerplate.
>
> agreed. i think the end result is good.
>
> Thanks
> Jeff
>
>
Matthew John Cheetham Nov. 1, 2022, 11:59 p.m. UTC | #4
On 2022-10-28 08:08, Derrick Stolee wrote:
> On 10/21/22 1:08 PM, Matthew John Cheetham via GitGitGadget wrote:
>> From: Matthew John Cheetham <mjcheetham@outlook.com>
> 
>> @@ -1500,6 +1500,8 @@ else
>>  	endif
>>  	BASIC_CFLAGS += $(CURL_CFLAGS)
>>  
>> +	TEST_PROGRAMS_NEED_X += test-http-server
>> +
>>  	REMOTE_CURL_PRIMARY = git-remote-http$X
>>  	REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X
>>  	REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES)
> 
> This hunk is in the "else" block of "ifdef NO_CURL",
> so this makes sense for why TEST_PROGRAMS_NEED_X is
> augmented here, away from other instances.
> 
>> diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
>> index 787738e6fa3..45251695ce0 100644
>> --- a/contrib/buildsystems/CMakeLists.txt
>> +++ b/contrib/buildsystems/CMakeLists.txt
>> @@ -989,6 +989,19 @@ set(wrapper_scripts
>>  set(wrapper_test_scripts
>>  	test-fake-ssh test-tool)
>>  
>> +if(CURL_FOUND)
>> +       list(APPEND wrapper_test_scripts test-http-server)
>> +
>> +       add_executable(test-http-server ${CMAKE_SOURCE_DIR}/t/helper/test-http-server.c)
>> +       target_link_libraries(test-http-server common-main)
>> +
>> +       if(MSVC)
>> +               set_target_properties(test-http-server
>> +                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/t/helper)
>> +               set_target_properties(test-http-server
>> +                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/t/helper)
>> +       endif()
>> +endif()
> 
> And this file has the pattern of many "if(CURL_FOUND)"
> blocks with isolated purposes, so it makes sense to
> have this be an isolated change instead of grouped with
> a different case.
> 
>> diff --git a/t/helper/.gitignore b/t/helper/.gitignore
>> index 8c2ddcce95f..1a94ab6eed5 100644
>> --- a/t/helper/.gitignore
>> +++ b/t/helper/.gitignore
>> @@ -1,2 +1,3 @@
>>  /test-tool
>>  /test-fake-ssh
>> +test-http-server
> 
> Should this start with a "/" like the other entries?

That it probably should! Will update.

>> diff --git a/t/helper/test-credential-helper-replay.sh b/t/helper/test-credential-helper-replay.sh
>> new file mode 100755
>> index 00000000000..03e5e63dad6
>> --- /dev/null
>> +++ b/t/helper/test-credential-helper-replay.sh
>> @@ -0,0 +1,14 @@
>> +cmd=$1
>> +teefile=$cmd-actual.cred
>> +catfile=$cmd-response.cred
>> +rm -f $teefile
>> +while read line;
>> +do
>> +	if test -z "$line"; then
>> +		break;
>> +	fi
>> +	echo "$line" >> $teefile
>> +done
>> +if test "$cmd" = "get"; then
>> +	cat $catfile
>> +fi
> 
> Should this be a helper method within another script, such
> as t/lib-credential.sh or t/lib-httpd.sh? The read over
> stdin will still work, as in this example:
> 
> read_chunk() {
> 	while read line; do
> 		case "$line" in
> 		--) break ;;
> 		*) echo "$line" ;;
> 		esac
> 	done
> }

This script file is used as a credential helper that is invoked by Git.
We specify that Git should use this credential helper in the tests using
the -c option:

  CREDENTIAL_HELPER="$GIT_BUILD_DIR/t/helper/test-credential-helper-replay.sh" \
	  && export CREDENTIAL_HELPER
..
   git -c credential.helper="$CREDENTIAL_HELPER" ls-remote $ORIGIN_URL &&


Would extracting a read_chunk() function to one of the lib-* test scripts
be worth it given we already need another entry script anyway?

What other scripts would be calling read_chunk()?


>> diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c
> 
>> @@ -0,0 +1,1134 @@
>> +#include "config.h"
>> +#include "run-command.h"
>> +#include "strbuf.h"
>> +#include "string-list.h"
>> +#include "trace2.h"
>> +#include "version.h"
>> +#include "dir.h"
>> +#include "date.h"
>> +
>> +#define TR2_CAT "test-http-server"
>> +
>> +static const char *pid_file;
>> +static int verbose;
>> +static int reuseaddr;
>> +
>> +static const char test_http_auth_usage[] =
>> +"http-server [--verbose]\n"
>> +"           [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
>> +"           [--reuseaddr] [--pid-file=<file>]\n"
>> +"           [--listen=<host_or_ipaddr>]* [--port=<n>]\n"
>> +"           [--anonymous-allowed]\n"
>> +"           [--auth=<scheme>[:<params>] [--auth-token=<scheme>:<token>]]*\n"
>> +;
> 
> These are a lot of options to implement all at once. They are probably
> simple enough, but depending on the implementation and tests, it might
> be helpful to split this patch into smaller ones that introduce these
> options along with the tests that exercise each. That will help
> verify that they are being tested properly instead of needing to track
> back and forth across the patch for each one.

I plan to split this patch in to several in a v3.

>> +
>> +/* Timeout, and initial timeout */
>> +static unsigned int timeout;
>> +static unsigned int init_timeout;
>> +
>> +static void logreport(const char *label, const char *err, va_list params)
>> +{
>> +	struct strbuf msg = STRBUF_INIT;
>> +
>> +	strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label);
>> +	strbuf_vaddf(&msg, err, params);
>> +	strbuf_addch(&msg, '\n');
>> +
>> +	fwrite(msg.buf, sizeof(char), msg.len, stderr);
>> +	fflush(stderr);
>> +
>> +	strbuf_release(&msg);
>> +}
>> +
>> +__attribute__((format (printf, 1, 2)))
>> +static void logerror(const char *err, ...)
>> +{
>> +	va_list params;
>> +	va_start(params, err);
>> +	logreport("error", err, params);
>> +	va_end(params);
>> +}
>> +
>> +__attribute__((format (printf, 1, 2)))
>> +static void loginfo(const char *err, ...)
>> +{
>> +	va_list params;
>> +	if (!verbose)
>> +		return;
>> +	va_start(params, err);
>> +	logreport("info", err, params);
>> +	va_end(params);
>> +}
> 
> I wonder how much of this we need or is just a nice thing. I would
> err on the side of making things as simple as possible, but being
> able to debug this test server may be important based on your
> experience.

These are useful to debug failures. Plus they also come from my copy
from daemon.c, so didn't want to touch/delete too much from that
starting point.

>> +static void set_keep_alive(int sockfd)
>> +{
>> +	int ka = 1;
>> +
>> +	if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) {
>> +		if (errno != ENOTSOCK)
>> +			logerror("unable to set SO_KEEPALIVE on socket: %s",
>> +				strerror(errno));
>> +	}
>> +}
>> +
>> +//////////////////////////////////////////////////////////////////
>> +// The code in this section is used by "worker" instances to service
>> +// a single connection from a client.  The worker talks to the client
>> +// on 0 and 1.
>> +//////////////////////////////////////////////////////////////////
> 
> Use /* */ style comments. You can repeat the asterisks to get a
> similar visual block.

Yep!

>> +
>> +enum worker_result {
>> +	/*
>> +	 * Operation successful.
>> +	 * Caller *might* keep the socket open and allow keep-alive.
>> +	 */
>> +	WR_OK       = 0,
>> +	/*
>> +	 * Various errors while processing the request and/or the response.
>> +	 * Close the socket and clean up.
>> +	 * Exit child-process with non-zero status.
>> +	 */
>> +	WR_IO_ERROR = 1<<0,
>> +	/*
>> +	 * Close the socket and clean up.  Does not imply an error.
>> +	 */
>> +	WR_HANGUP   = 1<<1,
> 
> nit: add a whitespace line between an item and the next
> item's comment.

Sure

>> +
>> +	WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP),
>> +};
> 
> (I read, but have no comments on the http-server boilerplate.)
> 
>> +
>> +enum auth_result {
>> +	AUTH_UNKNOWN = 0,
>> +	AUTH_DENY = 1,
>> +	AUTH_ALLOW = 2,
>> +};
>> +
>> +struct auth_module {
>> +	const char *scheme;
>> +	const char *challenge_params;
> 
> Later, I notice that you set challenge_params using an
> xstrdup() so this shouldn't be const and you should
> free it in any freeing code.

One question on this suggestion.. where would be appropriate to
free said char*? We need them for the lifetime of the process,
and they never grown in number beyond initial allocation from
parsing command line args.

I could move to stack alloc these in `cmd_main` and instead pass
a pointer to the `auth_modules` and count down through every
serve/handle etc function, rather than rely on them being global?

Thoughts or preferences?

>> +	struct string_list *tokens;
>> +};
>> +
>> +static int allow_anonymous;
>> +static struct auth_module **auth_modules = NULL;
>> +static size_t auth_modules_nr = 0;
>> +static size_t auth_modules_alloc = 0;
> 
> So, we are setting up a number of potential auth modules,
> each of which has a scheme to match a request to the module,
> and a list of tokens that would be considered worthy of the
> AUTH_ALLOW result. Otherwise, if the scheme matches but no
> token matches, we get AUTH_DENY. Finally, if no scheme matches
> we get AUTH_UNKNOWN.
> 
> This concept might be worth a comment here around the data
> structures before we get into how that is implemented.
> 
>> +static struct auth_module *get_auth_module(struct strbuf *scheme)
>> +{
>> +	int i;
>> +	struct auth_module *mod;
>> +	for (i = 0; i < auth_modules_nr; i++) {
>> +		mod = auth_modules[i];
>> +		if (!strcasecmp(mod->scheme, scheme->buf))
>> +			return mod;
>> +	}
>> +
>> +	return NULL;
>> +}
> 
> Matching the input scheme against the list of modules.
> 
> Only complaint: there is no reason that 'scheme' needs t
> be a strbuf, but could be a 'const char *' here.

True.

>> +static void add_auth_module(struct auth_module *mod)
>> +{
>> +	ALLOC_GROW(auth_modules, auth_modules_nr + 1, auth_modules_alloc);
>> +	auth_modules[auth_modules_nr++] = mod;
>> +}
> 
> nit: this could be located earlier, next to the list
> definition, or delayed until it is needed. That would
> allow get_auth_module() to be closer to its first use.

Not sure I follow.. are you saying I should move `add_auth_module`
to earlier in the file?

>> +static int is_authed(struct req *req, const char **user, enum worker_result *wr)
>> +{
>> +	enum auth_result result = AUTH_UNKNOWN;
>> +	struct string_list hdrs = STRING_LIST_INIT_NODUP;
>> +	struct auth_module *mod;
>> +
>> +	struct string_list_item *hdr;
>> +	struct string_list_item *token;
>> +	const char *v;
>> +	struct strbuf **split = NULL;
>> +	int i;
>> +	char *challenge;
>> +
>> +	/* ask all auth modules to validate the request */
>> +	for_each_string_list_item(hdr, &req->header_list) {
>> +		if (skip_iprefix(hdr->string, "Authorization: ", &v)) {
>> +			split = strbuf_split_str(v, ' ', 2);
>> +			if (!split[0] || !split[1]) continue;
> 
> For each valid request header...
> 
>> +			// trim trailing space ' '
>> +			strbuf_setlen(split[0], split[0]->len - 1);
>> +
>> +			mod = get_auth_module(split[0]);
>> +			if (mod) {
> 
> ...get an appropriate module, if it exists...
> 
>> +
>> +				for_each_string_list_item(token, mod->tokens) {
>> +					if (!strcmp(split[1]->buf, token->string)) {
>> +						result = AUTH_ALLOW;
>> +						goto done;
>> +					}
>> +				}
>> +
>> +				if (result != AUTH_UNKNOWN)
>> +					goto done;
> 
> ...and report if we find a valid token.
> 
> Here, it seems I was wrong in my expectation of AUTH_DENY:
> if a matching module exists but no token exists in that
> module, then we keep searching other modules. 

AUTH_DENY denies a request immediately and stops searching other modules.
AUTH_ALLOW approves the request and stops looking at other modules.
AUTH_UNKNOWN means this module didn't match or 'decide' to reject, so keep
looking/asking other modules.

After reading you review, I think it may be better to change this to
more closely match your expectations (and how typical servers behave):

Return AUTH_ALLOW if we find a matching valid token for the module.
If we match a module and do NOT find a token, then return AUTH_DENY.
Otherwise return AUTH_UNKNOWN - this means the user provided some auth
mechanism we don't understand, or no auth at all.

>> +			}
>> +		}
>> +	}
>> +
>> +done:
>> +	switch (result) {
>> +	case AUTH_ALLOW:
>> +		trace2_printf("%s: auth '%s' ALLOW", TR2_CAT, mod->scheme);
>> +		*user = "VALID_TEST_USER";
>> +		*wr = WR_OK;
>> +		break;
>> +
>> +	case AUTH_DENY:
>> +		trace2_printf("%s: auth '%s' DENY", TR2_CAT, mod->scheme);
>> +		/* fall-through */
> 
> I'm not sure that I see a case where this is possible. Maybe
> we should have a 'result = AUTH_DENY' at the start of the
> "if (mod)" block, followed by a 'goto done' in all cases
> instead of "if (result != AUTH_UNKNOWN)"?

In this version, you're correct.. AUTH_DENY is never returned.
This tri-state response from an auth module is an oversight from an earlier
local version - sorry for the confusion here, and thanks for catching!
I will update in a v3 to match sane expectations.

>> +	case AUTH_UNKNOWN:
>> +		if (allow_anonymous)
>> +			break;
> 
> If we do not require auth, then we want to continue if there
> is no matching authentication.
> 
>> +		for (i = 0; i < auth_modules_nr; i++) {
>> +			mod = auth_modules[i];
>> +			if (mod->challenge_params)
>> +				challenge = xstrfmt("WWW-Authenticate: %s %s",
>> +						    mod->scheme,
>> +						    mod->challenge_params);
>> +			else
>> +				challenge = xstrfmt("WWW-Authenticate: %s",
>> +						    mod->scheme);
>> +			string_list_append(&hdrs, challenge);
>> +		}
>> +		*wr = send_http_error(1, 401, "Unauthorized", -1, &hdrs, *wr);
> 
> However, here is the critical piece about how servers will
> start to act with the new WWW-Authenticate header usage in
> the Git credential helper interface. This will be critical
> in the testing for Git to retry the credential helper while
> passing these authentications schemes from the installed
> modules.
> 
>> +	}
>> +
>> +	strbuf_list_free(split);
>> +	string_list_clear(&hdrs, 0);
>> +
>> +	return result == AUTH_ALLOW ||
>> +	      (result == AUTH_UNKNOWN && allow_anonymous);
> 
> Did it work? Or did it not need to work? I'm interested to
> investigate the case that the client sent an authentication
> header that matches a module but doesn't match any tokens,
> but we allow anonymous access, anyway. Is that a 400? Or
> is that a 401?

It should probably be a 401 as the credentials are understood, but
are just 'bad'.

>> +static enum worker_result dispatch(struct req *req)
>> +{
>> +	enum worker_result wr = WR_OK;
>> +	const char *user = NULL;
>> +
>> +	if (!is_authed(req, &user, &wr))
>> +		return wr;
> 
> If we are not authed, send the 401 response.
> 
>> +	if (is_git_request(req))
>> +		return do__git(req, user);
> 
> If we are authed, then pass through to the Git response.
> 
>> +	return send_http_error(1, 501, "Not Implemented", -1, NULL,
>> +			       WR_OK | WR_HANGUP);
> 
> If the Git request fails, we don't care. This is a test.
> Just pass a 500-level error and the client will barf,
> letting us know that something went wrong.

Correct assessment!

>> +static void kill_some_child(void)
> 
>> +static void check_dead_children(void)
> 
> These technically sound methods have unfortunate names.
> Using something like "connection" over "child" might
> alleviate some of the horror. (I initially wanted to
> suggest "subprocess" but you compare live_children to
> max_connections in the next method, so connection seemed
> appropriate.)

These are copied exactly from git-daemon, so I'd rather
avoid the churn in renaming things.

>> +static struct strvec cld_argv = STRVEC_INIT;
>> +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
>> +{
>> +	struct child_process cld = CHILD_PROCESS_INIT;
>> +
>> +	if (max_connections && live_children >= max_connections) {
>> +		kill_some_child();
>> +		sleep(1);  /* give it some time to die */
>> +		check_dead_children();
>> +		if (live_children >= max_connections) {
>> +			close(incoming);
>> +			logerror("Too many children, dropping connection");
>> +			return;
>> +		}
>> +	}
> 
> Do we anticipate exercising concurrent requests in our
> tests? Perhaps it's not worth putting a cap on the
> connection count so we can keep the test helpers simple.

Probably not, but again.. 100% of the boilerplate here came from
the prior art in daemon.c, so didn't want to touch any of it!
I'm happy to start deleting things however if needed?

>> +	if (addr->sa_family == AF_INET) {
>> +		char buf[128] = "";
>> +		struct sockaddr_in *sin_addr = (void *) addr;
>> +		inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf));
>> +		strvec_pushf(&cld.env, "REMOTE_ADDR=%s", buf);
>> +		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
>> +				 ntohs(sin_addr->sin_port));
>> +#ifndef NO_IPV6
>> +	} else if (addr->sa_family == AF_INET6) {
>> +		char buf[128] = "";
>> +		struct sockaddr_in6 *sin6_addr = (void *) addr;
>> +		inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf));
>> +		strvec_pushf(&cld.env, "REMOTE_ADDR=[%s]", buf);
>> +		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
>> +				 ntohs(sin6_addr->sin6_port));
>> +#endif
>> +	}
>> +
>> +	strvec_pushv(&cld.args, cld_argv.v);
>> +	cld.in = incoming;
>> +	cld.out = dup(incoming);
>> +
>> +	if (cld.out < 0)
>> +		logerror("could not dup() `incoming`");
>> +	else if (start_command(&cld))
>> +		logerror("unable to fork");
>> +	else
>> +		add_child(&cld, addr, addrlen);
>> +}
>> +
> 
> I scanned the socket creation code, but my eyes were
> glazing over. I'm definitely in the camp of "if it works,
> that's enough for our tests." If we start to rely on this
> test harness in more places, we can improve any shortcomings
> as they arise.
> 
>> +//////////////////////////////////////////////////////////////////
>> +// This section is executed by both the primary instance and all
>> +// worker instances.  So, yes, each child-process re-parses the
>> +// command line argument and re-discovers how it should behave.
>> +//////////////////////////////////////////////////////////////////
>> +
>> +int cmd_main(int argc, const char **argv)
>> +{
>> +	int listen_port = 0;
>> +	struct string_list listen_addr = STRING_LIST_INIT_NODUP;
>> +	int worker_mode = 0;
>> +	int i;
>> +	struct auth_module *mod = NULL;
>> +
>> +	trace2_cmd_name("test-http-server");
>> +	setup_git_directory_gently(NULL);
>> +
>> +	for (i = 1; i < argc; i++) {
>> +		const char *arg = argv[i];
>> +		const char *v;
>> +
>> +		if (skip_prefix(arg, "--listen=", &v)) {
>> +			string_list_append(&listen_addr, xstrdup_tolower(v));
>> +			continue;
>> +		}
>> +		if (skip_prefix(arg, "--port=", &v)) {
>> +			char *end;
>> +			unsigned long n;
>> +			n = strtoul(v, &end, 0);
>> +			if (*v && !*end) {
>> +				listen_port = n;
>> +				continue;
>> +			}
>> +		}
>> +		if (!strcmp(arg, "--worker")) {
>> +			worker_mode = 1;
>> +			trace2_cmd_mode("worker");
>> +			continue;
>> +		}
>> +		if (!strcmp(arg, "--verbose")) {
>> +			verbose = 1;
>> +			continue;
>> +		}
>> +		if (skip_prefix(arg, "--timeout=", &v)) {
>> +			timeout = atoi(v);
>> +			continue;
>> +		}
>> +		if (skip_prefix(arg, "--init-timeout=", &v)) {
>> +			init_timeout = atoi(v);
>> +			continue;
>> +		}
>> +		if (skip_prefix(arg, "--max-connections=", &v)) {
>> +			max_connections = atoi(v);
>> +			if (max_connections < 0)
>> +				max_connections = 0; /* unlimited */
>> +			continue;
>> +		}
>> +		if (!strcmp(arg, "--reuseaddr")) {
>> +			reuseaddr = 1;
>> +			continue;
>> +		}
>> +		if (skip_prefix(arg, "--pid-file=", &v)) {
>> +			pid_file = v;
>> +			continue;
>> +		}
> 
> ok, most of these arguments are actually about the per-connection
> subprocesses.
> 
>> +		if (skip_prefix(arg, "--allow-anonymous", &v)) {
>> +			allow_anonymous = 1;
>> +			continue;
>> +		}
> 
> Here is how we choose to allo anonymous access.
> 
>> +		if (skip_prefix(arg, "--auth=", &v)) {
>> +			struct strbuf **p = strbuf_split_str(v, ':', 2);
>> +
>> +			if (!p[0]) {
>> +				error("invalid argument '%s'", v);
>> +				usage(test_http_auth_usage);
>> +			}
>> +
>> +			// trim trailing ':'
>> +			if (p[1])
>> +				strbuf_setlen(p[0], p[0]->len - 1);
>> +
>> +			if (get_auth_module(p[0])) {
>> +				error("duplicate auth scheme '%s'\n", p[0]->buf);
>> +				usage(test_http_auth_usage);
>> +			}
>> +
>> +			mod = xmalloc(sizeof(struct auth_module));
>> +			mod->scheme = xstrdup(p[0]->buf);
>> +			mod->challenge_params = p[1] ? xstrdup(p[1]->buf) : NULL;
> 
> Here, you xstrdup() into a 'const char *', but you are really
> passing ownership so it shouldn't be conts.
Ok

> 
>> +			mod->tokens = xmalloc(sizeof(struct string_list));
> 
> nit: this could also be "CALLOC_ARRAY(mod->tokens, 1);"
Sure!
>> +			string_list_init_dup(mod->tokens);
>> +
>> +			add_auth_module(mod);
>> +
>> +			strbuf_list_free(p);
>> +			continue;
> 
> Ok, we gain the auth schemes from the command line.
> 
>> +		}
>> +		if (skip_prefix(arg, "--auth-token=", &v)) {
>> +			struct strbuf **p = strbuf_split_str(v, ':', 2);
>> +			if (!p[0]) {
>> +				error("invalid argument '%s'", v);
>> +				usage(test_http_auth_usage);
>> +			}
>> +
>> +			if (!p[1]) {
>> +				error("missing token value '%s'\n", v);
>> +				usage(test_http_auth_usage);
>> +			}
>> +
>> +			// trim trailing ':'
> 
> Use /* */ (Aside: I'm surprised we don't have a build option in
> DEVELOPER=1 that catches the use of these comments.)
Me too! Appologies here.
>> +			strbuf_setlen(p[0], p[0]->len - 1);
>> +
>> +			mod = get_auth_module(p[0]);
>> +			if (!mod) {
>> +				error("auth scheme not defined '%s'\n", p[0]->buf);
>> +				usage(test_http_auth_usage);
>> +			}
>> +
>> +			string_list_append(mod->tokens, p[1]->buf);
>> +			strbuf_list_free(p);
>> +			continue;
>> +		}
> 
> And the token lists. It is important that the scheme is added
> before any token is added.
> 
>> +		fprintf(stderr, "error: unknown argument '%s'\n", arg);
>> +		usage(test_http_auth_usage);
>> +	}
>> +
>> +	/* avoid splitting a message in the middle */
>> +	setvbuf(stderr, NULL, _IOFBF, 4096);
>> +
>> +	if (listen_port == 0)
>> +		listen_port = DEFAULT_GIT_PORT;
>> +
>> +	/*
>> +	 * If no --listen=<addr> args are given, the setup_named_sock()
>> +	 * code will use receive a NULL address and set INADDR_ANY.
>> +	 * This exposes both internal and external interfaces on the
>> +	 * port.
>> +	 *
>> +	 * Disallow that and default to the internal-use-only loopback
>> +	 * address.
>> +	 */
>> +	if (!listen_addr.nr)
>> +		string_list_append(&listen_addr, "127.0.0.1");
>> +
>> +	/*
>> +	 * worker_mode is set in our own child process instances
>> +	 * (that are bound to a connected socket from a client).
>> +	 */
>> +	if (worker_mode)
>> +		return worker();
>> +
>> +	/*
>> +	 * `cld_argv` is a bit of a clever hack. The top-level instance
>> +	 * of test-http-server does the normal bind/listen/accept stuff.
>> +	 * For each incoming socket, the top-level process spawns
>> +	 * a child instance of test-http-server *WITH* the additional
>> +	 * `--worker` argument. This causes the child to set `worker_mode`
>> +	 * and immediately call `worker()` using the connected socket (and
>> +	 * without the usual need for fork() or threads).
>> +	 *
>> +	 * The magic here is made possible because `cld_argv` is static
>> +	 * and handle() (called by service_loop()) knows about it.
>> +	 */
>> +	strvec_push(&cld_argv, argv[0]);
>> +	strvec_push(&cld_argv, "--worker");
>> +	for (i = 1; i < argc; ++i)
>> +		strvec_push(&cld_argv, argv[i]);
>> +
>> +	/*
>> +	 * Setup primary instance to listen for connections.
>> +	 */
>> +	return serve(&listen_addr, listen_port);
>> +}
> 
> And complete the thing with some boilerplate.
> 
> This was a lot to read, and the interesting bits are all mixed in
> with the http server code, which is less interesting to what we
> are trying to accomplish. It would be beneficial to split this
> into one or two patches before we actually introduce the tests.
> 
> The most important thing that I think would be helpful is to
> isolate all the authentication behavior into its own patch so
> we can see how those connections from the command-line arguments
> affect the behavior of the server responses.
> 
> I think ideally we would have the following split:
> 
>  1. All server boilerblate. All requests 500 not-implemented.
> 
>  2. Add Git fall-through with no authentication. Add the tests
>     that are intended to allow anonymous auth.
> 
>  3. Add authentication data structures read from command-line,
>     but not processed at all in the logic.
> 
>  4. Act on the authentication data structures to alter the
>     requests. Add the tests that use these authentication
>     schemes.
> 
> I could easily see a case for combining 1&2 as well as 3&4,
> for slightly larger but more completely-testable changes at
> every step.
I agree, and my appologies for not splitting these out.
I'll follow up with a split that should make more sense.
> From what I read, I don't think there is much to change in
> the end result of the code, but it definitely was hard to read
> the important things when surrounded by many lines of
> boilerplate.
> 
>> diff --git a/t/t5556-http-auth.sh b/t/t5556-http-auth.sh
> 
> I'm going to pause here and come back to the test script in
> a separate reply.
> 
> Thanks,
> -Stolee
Thanks,
Matthew
Derrick Stolee Nov. 2, 2022, 2:38 p.m. UTC | #5
On 11/1/22 7:14 PM, Matthew John Cheetham wrote:
> On 2022-10-28 12:14, Jeff Hostetler wrote:
>> On 10/28/22 11:08 AM, Derrick Stolee wrote:

>>>> +static void kill_some_child(void)
>>>
>>>> +static void check_dead_children(void)
>>>
>>> These technically sound methods have unfortunate names.
>>> Using something like "connection" over "child" might
>>> alleviate some of the horror. (I initially wanted to
>>> suggest "subprocess" but you compare live_children to
>>> max_connections in the next method, so connection seemed
>>> appropriate.)
>>
>> These names were inherited from `daemon.c` IIRC. I wouldn't change
>> them since it'll just introduce noise when diffing.  Especially,
>> if we do the copy commit first.
> 
> Indeed. These functions are untouched from daemon.c. I do plan to split
> this mega-patch up however in to a single 'add the boilerplate' based on
> git-daemon patch, then add the extra pieces like HTTP request parsing and
> the auth pieces in a v3.

If these are copied from daemon.c, it may be worth trying
to lib-ify these data structures and code so they can be
shared across the two places. That can also come up as a
cleanup later, too.

For now, don't bother changing the names since they exist
somewhere else.
 
>> [...]
>>>> +static struct strvec cld_argv = STRVEC_INIT;
>>>> +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
>>>> +{
>>>> +    struct child_process cld = CHILD_PROCESS_INIT;
>>>> +
>>>> +    if (max_connections && live_children >= max_connections) {
>>>> +        kill_some_child();
>>>> +        sleep(1);  /* give it some time to die */
>>>> +        check_dead_children();
>>>> +        if (live_children >= max_connections) {
>>>> +            close(incoming);
>>>> +            logerror("Too many children, dropping connection");
>>>> +            return;
>>>> +        }
>>>> +    }
>>>
>>> Do we anticipate exercising concurrent requests in our
>>> tests? Perhaps it's not worth putting a cap on the
>>> connection count so we can keep the test helpers simple.
>>
>> again, this code was inherited from `daemon.c`, so we could leave it.

I wonder how much could be extracted from daemon.c using a
copy into a 'daemon-lib.c' with methods defined in 'daemon-lib.h'
then consumed from this file instead. Not sure it's worth the
churn to daemon.c, though.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index d93ad956e58..39b130f711d 100644
--- a/Makefile
+++ b/Makefile
@@ -1500,6 +1500,8 @@  else
 	endif
 	BASIC_CFLAGS += $(CURL_CFLAGS)
 
+	TEST_PROGRAMS_NEED_X += test-http-server
+
 	REMOTE_CURL_PRIMARY = git-remote-http$X
 	REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X
 	REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES)
diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt
index 787738e6fa3..45251695ce0 100644
--- a/contrib/buildsystems/CMakeLists.txt
+++ b/contrib/buildsystems/CMakeLists.txt
@@ -989,6 +989,19 @@  set(wrapper_scripts
 set(wrapper_test_scripts
 	test-fake-ssh test-tool)
 
+if(CURL_FOUND)
+       list(APPEND wrapper_test_scripts test-http-server)
+
+       add_executable(test-http-server ${CMAKE_SOURCE_DIR}/t/helper/test-http-server.c)
+       target_link_libraries(test-http-server common-main)
+
+       if(MSVC)
+               set_target_properties(test-http-server
+                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/t/helper)
+               set_target_properties(test-http-server
+                                       PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/t/helper)
+       endif()
+endif()
 
 foreach(script ${wrapper_scripts})
 	file(STRINGS ${CMAKE_SOURCE_DIR}/wrap-for-bin.sh content NEWLINE_CONSUME)
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 8c2ddcce95f..1a94ab6eed5 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -1,2 +1,3 @@ 
 /test-tool
 /test-fake-ssh
+test-http-server
diff --git a/t/helper/test-credential-helper-replay.sh b/t/helper/test-credential-helper-replay.sh
new file mode 100755
index 00000000000..03e5e63dad6
--- /dev/null
+++ b/t/helper/test-credential-helper-replay.sh
@@ -0,0 +1,14 @@ 
+cmd=$1
+teefile=$cmd-actual.cred
+catfile=$cmd-response.cred
+rm -f $teefile
+while read line;
+do
+	if test -z "$line"; then
+		break;
+	fi
+	echo "$line" >> $teefile
+done
+if test "$cmd" = "get"; then
+	cat $catfile
+fi
diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c
new file mode 100644
index 00000000000..92139c04c90
--- /dev/null
+++ b/t/helper/test-http-server.c
@@ -0,0 +1,1134 @@ 
+#include "config.h"
+#include "run-command.h"
+#include "strbuf.h"
+#include "string-list.h"
+#include "trace2.h"
+#include "version.h"
+#include "dir.h"
+#include "date.h"
+
+#define TR2_CAT "test-http-server"
+
+static const char *pid_file;
+static int verbose;
+static int reuseaddr;
+
+static const char test_http_auth_usage[] =
+"http-server [--verbose]\n"
+"           [--timeout=<n>] [--init-timeout=<n>] [--max-connections=<n>]\n"
+"           [--reuseaddr] [--pid-file=<file>]\n"
+"           [--listen=<host_or_ipaddr>]* [--port=<n>]\n"
+"           [--anonymous-allowed]\n"
+"           [--auth=<scheme>[:<params>] [--auth-token=<scheme>:<token>]]*\n"
+;
+
+/* Timeout, and initial timeout */
+static unsigned int timeout;
+static unsigned int init_timeout;
+
+static void logreport(const char *label, const char *err, va_list params)
+{
+	struct strbuf msg = STRBUF_INIT;
+
+	strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label);
+	strbuf_vaddf(&msg, err, params);
+	strbuf_addch(&msg, '\n');
+
+	fwrite(msg.buf, sizeof(char), msg.len, stderr);
+	fflush(stderr);
+
+	strbuf_release(&msg);
+}
+
+__attribute__((format (printf, 1, 2)))
+static void logerror(const char *err, ...)
+{
+	va_list params;
+	va_start(params, err);
+	logreport("error", err, params);
+	va_end(params);
+}
+
+__attribute__((format (printf, 1, 2)))
+static void loginfo(const char *err, ...)
+{
+	va_list params;
+	if (!verbose)
+		return;
+	va_start(params, err);
+	logreport("info", err, params);
+	va_end(params);
+}
+
+static void set_keep_alive(int sockfd)
+{
+	int ka = 1;
+
+	if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) {
+		if (errno != ENOTSOCK)
+			logerror("unable to set SO_KEEPALIVE on socket: %s",
+				strerror(errno));
+	}
+}
+
+//////////////////////////////////////////////////////////////////
+// The code in this section is used by "worker" instances to service
+// a single connection from a client.  The worker talks to the client
+// on 0 and 1.
+//////////////////////////////////////////////////////////////////
+
+enum worker_result {
+	/*
+	 * Operation successful.
+	 * Caller *might* keep the socket open and allow keep-alive.
+	 */
+	WR_OK       = 0,
+	/*
+	 * Various errors while processing the request and/or the response.
+	 * Close the socket and clean up.
+	 * Exit child-process with non-zero status.
+	 */
+	WR_IO_ERROR = 1<<0,
+	/*
+	 * Close the socket and clean up.  Does not imply an error.
+	 */
+	WR_HANGUP   = 1<<1,
+
+	WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP),
+};
+
+/*
+ * Fields from a parsed HTTP request.
+ */
+struct req {
+	struct strbuf start_line;
+
+	const char *method;
+	const char *http_version;
+
+	struct strbuf uri_path;
+	struct strbuf query_args;
+
+	struct string_list header_list;
+	const char *content_type;
+	ssize_t content_length;
+};
+
+#define REQ__INIT { \
+	.start_line = STRBUF_INIT, \
+	.uri_path = STRBUF_INIT, \
+	.query_args = STRBUF_INIT, \
+	.header_list = STRING_LIST_INIT_NODUP, \
+	.content_type = NULL, \
+	.content_length = -1 \
+	}
+
+static void req__release(struct req *req)
+{
+	strbuf_release(&req->start_line);
+
+	strbuf_release(&req->uri_path);
+	strbuf_release(&req->query_args);
+
+	string_list_clear(&req->header_list, 0);
+}
+
+static enum worker_result send_http_error(
+	int fd,
+	int http_code, const char *http_code_name,
+	int retry_after_seconds, struct string_list *response_headers,
+	enum worker_result wr_in)
+{
+	struct strbuf response_header = STRBUF_INIT;
+	struct strbuf response_content = STRBUF_INIT;
+	struct string_list_item *h;
+	enum worker_result wr;
+
+	strbuf_addf(&response_content, "Error: %d %s\r\n",
+		    http_code, http_code_name);
+	if (retry_after_seconds > 0)
+		strbuf_addf(&response_content, "Retry-After: %d\r\n",
+			    retry_after_seconds);
+
+	strbuf_addf  (&response_header, "HTTP/1.1 %d %s\r\n", http_code, http_code_name);
+	strbuf_addstr(&response_header, "Cache-Control: private\r\n");
+	strbuf_addstr(&response_header,	"Content-Type: text/plain\r\n");
+	strbuf_addf  (&response_header,	"Content-Length: %d\r\n", (int)response_content.len);
+	if (retry_after_seconds > 0)
+		strbuf_addf  (&response_header, "Retry-After: %d\r\n", retry_after_seconds);
+	strbuf_addf(  &response_header,	"Server: test-http-server/%s\r\n", git_version_string);
+	strbuf_addf(  &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822)));
+	if (response_headers)
+		for_each_string_list_item(h, response_headers)
+			strbuf_addf(&response_header, "%s\r\n", h->string);
+	strbuf_addstr(&response_header, "\r\n");
+
+	if (write_in_full(fd, response_header.buf, response_header.len) < 0) {
+		logerror("unable to write response header");
+		wr = WR_IO_ERROR;
+		goto done;
+	}
+
+	if (write_in_full(fd, response_content.buf, response_content.len) < 0) {
+		logerror("unable to write response content body");
+		wr = WR_IO_ERROR;
+		goto done;
+	}
+
+	wr = wr_in;
+
+done:
+	strbuf_release(&response_header);
+	strbuf_release(&response_content);
+
+	return wr;
+}
+
+/*
+ * Read the HTTP request up to the start of the optional message-body.
+ * We do this byte-by-byte because we have keep-alive turned on and
+ * cannot rely on an EOF.
+ *
+ * https://tools.ietf.org/html/rfc7230
+ *
+ * We cannot call die() here because our caller needs to properly
+ * respond to the client and/or close the socket before this
+ * child exits so that the client doesn't get a connection reset
+ * by peer error.
+ */
+static enum worker_result req__read(struct req *req, int fd)
+{
+	struct strbuf h = STRBUF_INIT;
+	struct string_list start_line_fields = STRING_LIST_INIT_DUP;
+	int nr_start_line_fields;
+	const char *uri_target;
+	const char *query;
+	char *hp;
+	const char *hv;
+
+	enum worker_result result = WR_OK;
+
+	/*
+	 * Read line 0 of the request and split it into component parts:
+	 *
+	 *    <method> SP <uri-target> SP <HTTP-version> CRLF
+	 *
+	 */
+	if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) {
+		result = WR_OK | WR_HANGUP;
+		goto done;
+	}
+
+	strbuf_trim_trailing_newline(&req->start_line);
+
+	nr_start_line_fields = string_list_split(&start_line_fields,
+						 req->start_line.buf,
+						 ' ', -1);
+	if (nr_start_line_fields != 3) {
+		logerror("could not parse request start-line '%s'",
+			 req->start_line.buf);
+		result = WR_IO_ERROR;
+		goto done;
+	}
+
+	req->method = xstrdup(start_line_fields.items[0].string);
+	req->http_version = xstrdup(start_line_fields.items[2].string);
+
+	uri_target = start_line_fields.items[1].string;
+
+	if (strcmp(req->http_version, "HTTP/1.1")) {
+		logerror("unsupported version '%s' (expecting HTTP/1.1)",
+			 req->http_version);
+		result = WR_IO_ERROR;
+		goto done;
+	}
+
+	query = strchr(uri_target, '?');
+
+	if (query) {
+		strbuf_add(&req->uri_path, uri_target, (query - uri_target));
+		strbuf_trim_trailing_dir_sep(&req->uri_path);
+		strbuf_addstr(&req->query_args, query + 1);
+	} else {
+		strbuf_addstr(&req->uri_path, uri_target);
+		strbuf_trim_trailing_dir_sep(&req->uri_path);
+	}
+
+	/*
+	 * Read the set of HTTP headers into a string-list.
+	 */
+	while (1) {
+		if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF)
+			goto done;
+		strbuf_trim_trailing_newline(&h);
+
+		if (!h.len)
+			goto done; /* a blank line ends the header */
+
+		hp = strbuf_detach(&h, NULL);
+		string_list_append(&req->header_list, hp);
+
+		/* store common request headers separately */
+		if (skip_prefix(hp, "Content-Type: ", &hv)) {
+			req->content_type = hv;
+		} else if (skip_prefix(hp, "Content-Length: ", &hv)) {
+			req->content_length = strtol(hv, &hp, 10);
+		}
+	}
+
+	/*
+	 * We do not attempt to read the <message-body>, if it exists.
+	 * We let our caller read/chunk it in as appropriate.
+	 */
+
+done:
+	string_list_clear(&start_line_fields, 0);
+
+	/*
+	 * This is useful for debugging the request, but very noisy.
+	 */
+	if (trace2_is_enabled()) {
+		struct string_list_item *item;
+		trace2_printf("%s: %s", TR2_CAT, req->start_line.buf);
+		trace2_printf("%s: hver: %s", TR2_CAT, req->http_version);
+		trace2_printf("%s: hmth: %s", TR2_CAT, req->method);
+		trace2_printf("%s: path: %s", TR2_CAT, req->uri_path.buf);
+		trace2_printf("%s: qury: %s", TR2_CAT, req->query_args.buf);
+		if (req->content_length >= 0)
+			trace2_printf("%s: clen: %d", TR2_CAT, req->content_length);
+		if (req->content_type)
+			trace2_printf("%s: ctyp: %s", TR2_CAT, req->content_type);
+		for_each_string_list_item(item, &req->header_list)
+			trace2_printf("%s: hdrs: %s", TR2_CAT, item->string);
+	}
+
+	return result;
+}
+
+static int is_git_request(struct req *req)
+{
+	static regex_t *smart_http_regex;
+	static int initialized;
+
+	if (!initialized) {
+		smart_http_regex = xmalloc(sizeof(*smart_http_regex));
+		if (regcomp(smart_http_regex, "^/(HEAD|info/refs|"
+			    "objects/info/[^/]+|git-(upload|receive)-pack)$",
+			    REG_EXTENDED)) {
+			warning("could not compile smart HTTP regex");
+			smart_http_regex = NULL;
+		}
+		initialized = 1;
+	}
+
+	return smart_http_regex &&
+		!regexec(smart_http_regex, req->uri_path.buf, 0, NULL, 0);
+}
+
+static enum worker_result do__git(struct req *req, const char *user)
+{
+	const char *ok = "HTTP/1.1 200 OK\r\n";
+	struct child_process cp = CHILD_PROCESS_INIT;
+	int res;
+
+	if (write(1, ok, strlen(ok)) < 0)
+		return error(_("could not send '%s'"), ok);
+
+	if (user)
+		strvec_pushf(&cp.env, "REMOTE_USER=%s", user);
+
+	strvec_pushf(&cp.env, "REQUEST_METHOD=%s", req->method);
+	strvec_pushf(&cp.env, "PATH_TRANSLATED=%s",
+			req->uri_path.buf);
+	strvec_push(&cp.env, "SERVER_PROTOCOL=HTTP/1.1");
+	if (req->query_args.len)
+		strvec_pushf(&cp.env, "QUERY_STRING=%s",
+				req->query_args.buf);
+	if (req->content_type)
+		strvec_pushf(&cp.env, "CONTENT_TYPE=%s",
+				req->content_type);
+	if (req->content_length >= 0)
+		strvec_pushf(&cp.env, "CONTENT_LENGTH=%" PRIdMAX,
+				(intmax_t)req->content_length);
+	cp.git_cmd = 1;
+	strvec_push(&cp.args, "http-backend");
+	res = run_command(&cp);
+	close(1);
+	close(0);
+	return !!res;
+}
+
+enum auth_result {
+	AUTH_UNKNOWN = 0,
+	AUTH_DENY = 1,
+	AUTH_ALLOW = 2,
+};
+
+struct auth_module {
+	const char *scheme;
+	const char *challenge_params;
+	struct string_list *tokens;
+};
+
+static int allow_anonymous;
+static struct auth_module **auth_modules = NULL;
+static size_t auth_modules_nr = 0;
+static size_t auth_modules_alloc = 0;
+
+static struct auth_module *get_auth_module(struct strbuf *scheme)
+{
+	int i;
+	struct auth_module *mod;
+	for (i = 0; i < auth_modules_nr; i++) {
+		mod = auth_modules[i];
+		if (!strcasecmp(mod->scheme, scheme->buf))
+			return mod;
+	}
+
+	return NULL;
+}
+
+static void add_auth_module(struct auth_module *mod)
+{
+	ALLOC_GROW(auth_modules, auth_modules_nr + 1, auth_modules_alloc);
+	auth_modules[auth_modules_nr++] = mod;
+}
+
+static int is_authed(struct req *req, const char **user, enum worker_result *wr)
+{
+	enum auth_result result = AUTH_UNKNOWN;
+	struct string_list hdrs = STRING_LIST_INIT_NODUP;
+	struct auth_module *mod;
+
+	struct string_list_item *hdr;
+	struct string_list_item *token;
+	const char *v;
+	struct strbuf **split = NULL;
+	int i;
+	char *challenge;
+
+	/* ask all auth modules to validate the request */
+	for_each_string_list_item(hdr, &req->header_list) {
+		if (skip_iprefix(hdr->string, "Authorization: ", &v)) {
+			split = strbuf_split_str(v, ' ', 2);
+			if (!split[0] || !split[1]) continue;
+
+			// trim trailing space ' '
+			strbuf_setlen(split[0], split[0]->len - 1);
+
+			mod = get_auth_module(split[0]);
+			if (mod) {
+
+				for_each_string_list_item(token, mod->tokens) {
+					if (!strcmp(split[1]->buf, token->string)) {
+						result = AUTH_ALLOW;
+						goto done;
+					}
+				}
+
+				if (result != AUTH_UNKNOWN)
+					goto done;
+			}
+		}
+	}
+
+done:
+	switch (result) {
+	case AUTH_ALLOW:
+		trace2_printf("%s: auth '%s' ALLOW", TR2_CAT, mod->scheme);
+		*user = "VALID_TEST_USER";
+		*wr = WR_OK;
+		break;
+
+	case AUTH_DENY:
+		trace2_printf("%s: auth '%s' DENY", TR2_CAT, mod->scheme);
+		/* fall-through */
+
+	case AUTH_UNKNOWN:
+		if (allow_anonymous)
+			break;
+		for (i = 0; i < auth_modules_nr; i++) {
+			mod = auth_modules[i];
+			if (mod->challenge_params)
+				challenge = xstrfmt("WWW-Authenticate: %s %s",
+						    mod->scheme,
+						    mod->challenge_params);
+			else
+				challenge = xstrfmt("WWW-Authenticate: %s",
+						    mod->scheme);
+			string_list_append(&hdrs, challenge);
+		}
+		*wr = send_http_error(1, 401, "Unauthorized", -1, &hdrs, *wr);
+	}
+
+	strbuf_list_free(split);
+	string_list_clear(&hdrs, 0);
+
+	return result == AUTH_ALLOW ||
+	      (result == AUTH_UNKNOWN && allow_anonymous);
+}
+
+static enum worker_result dispatch(struct req *req)
+{
+	enum worker_result wr = WR_OK;
+	const char *user = NULL;
+
+	if (!is_authed(req, &user, &wr))
+		return wr;
+
+	if (is_git_request(req))
+		return do__git(req, user);
+
+	return send_http_error(1, 501, "Not Implemented", -1, NULL,
+			       WR_OK | WR_HANGUP);
+}
+
+static enum worker_result worker(void)
+{
+	struct req req = REQ__INIT;
+	char *client_addr = getenv("REMOTE_ADDR");
+	char *client_port = getenv("REMOTE_PORT");
+	enum worker_result wr = WR_OK;
+
+	if (client_addr)
+		loginfo("Connection from %s:%s", client_addr, client_port);
+
+	set_keep_alive(0);
+
+	while (1) {
+		req__release(&req);
+
+		alarm(init_timeout ? init_timeout : timeout);
+		wr = req__read(&req, 0);
+		alarm(0);
+
+		if (wr & WR_STOP_THE_MUSIC)
+			break;
+
+		wr = dispatch(&req);
+		if (wr & WR_STOP_THE_MUSIC)
+			break;
+	}
+
+	close(0);
+	close(1);
+
+	return !!(wr & WR_IO_ERROR);
+}
+
+//////////////////////////////////////////////////////////////////
+// This section contains the listener and child-process management
+// code used by the primary instance to accept incoming connections
+// and dispatch them to async child process "worker" instances.
+//////////////////////////////////////////////////////////////////
+
+static int addrcmp(const struct sockaddr_storage *s1,
+		   const struct sockaddr_storage *s2)
+{
+	const struct sockaddr *sa1 = (const struct sockaddr*) s1;
+	const struct sockaddr *sa2 = (const struct sockaddr*) s2;
+
+	if (sa1->sa_family != sa2->sa_family)
+		return sa1->sa_family - sa2->sa_family;
+	if (sa1->sa_family == AF_INET)
+		return memcmp(&((struct sockaddr_in *)s1)->sin_addr,
+		    &((struct sockaddr_in *)s2)->sin_addr,
+		    sizeof(struct in_addr));
+#ifndef NO_IPV6
+	if (sa1->sa_family == AF_INET6)
+		return memcmp(&((struct sockaddr_in6 *)s1)->sin6_addr,
+		    &((struct sockaddr_in6 *)s2)->sin6_addr,
+		    sizeof(struct in6_addr));
+#endif
+	return 0;
+}
+
+static int max_connections = 32;
+
+static unsigned int live_children;
+
+static struct child {
+	struct child *next;
+	struct child_process cld;
+	struct sockaddr_storage address;
+} *firstborn;
+
+static void add_child(struct child_process *cld, struct sockaddr *addr, socklen_t addrlen)
+{
+	struct child *newborn, **cradle;
+
+	newborn = xcalloc(1, sizeof(*newborn));
+	live_children++;
+	memcpy(&newborn->cld, cld, sizeof(*cld));
+	memcpy(&newborn->address, addr, addrlen);
+	for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next)
+		if (!addrcmp(&(*cradle)->address, &newborn->address))
+			break;
+	newborn->next = *cradle;
+	*cradle = newborn;
+}
+
+/*
+ * This gets called if the number of connections grows
+ * past "max_connections".
+ *
+ * We kill the newest connection from a duplicate IP.
+ */
+static void kill_some_child(void)
+{
+	const struct child *blanket, *next;
+
+	if (!(blanket = firstborn))
+		return;
+
+	for (; (next = blanket->next); blanket = next)
+		if (!addrcmp(&blanket->address, &next->address)) {
+			kill(blanket->cld.pid, SIGTERM);
+			break;
+		}
+}
+
+static void check_dead_children(void)
+{
+	int status;
+	pid_t pid;
+
+	struct child **cradle, *blanket;
+	for (cradle = &firstborn; (blanket = *cradle);)
+		if ((pid = waitpid(blanket->cld.pid, &status, WNOHANG)) > 1) {
+			const char *dead = "";
+			if (status)
+				dead = " (with error)";
+			loginfo("[%"PRIuMAX"] Disconnected%s", (uintmax_t)pid, dead);
+
+			/* remove the child */
+			*cradle = blanket->next;
+			live_children--;
+			child_process_clear(&blanket->cld);
+			free(blanket);
+		} else
+			cradle = &blanket->next;
+}
+
+static struct strvec cld_argv = STRVEC_INIT;
+static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen)
+{
+	struct child_process cld = CHILD_PROCESS_INIT;
+
+	if (max_connections && live_children >= max_connections) {
+		kill_some_child();
+		sleep(1);  /* give it some time to die */
+		check_dead_children();
+		if (live_children >= max_connections) {
+			close(incoming);
+			logerror("Too many children, dropping connection");
+			return;
+		}
+	}
+
+	if (addr->sa_family == AF_INET) {
+		char buf[128] = "";
+		struct sockaddr_in *sin_addr = (void *) addr;
+		inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf));
+		strvec_pushf(&cld.env, "REMOTE_ADDR=%s", buf);
+		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
+				 ntohs(sin_addr->sin_port));
+#ifndef NO_IPV6
+	} else if (addr->sa_family == AF_INET6) {
+		char buf[128] = "";
+		struct sockaddr_in6 *sin6_addr = (void *) addr;
+		inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf));
+		strvec_pushf(&cld.env, "REMOTE_ADDR=[%s]", buf);
+		strvec_pushf(&cld.env, "REMOTE_PORT=%d",
+				 ntohs(sin6_addr->sin6_port));
+#endif
+	}
+
+	strvec_pushv(&cld.args, cld_argv.v);
+	cld.in = incoming;
+	cld.out = dup(incoming);
+
+	if (cld.out < 0)
+		logerror("could not dup() `incoming`");
+	else if (start_command(&cld))
+		logerror("unable to fork");
+	else
+		add_child(&cld, addr, addrlen);
+}
+
+static void child_handler(int signo)
+{
+	/*
+	 * Otherwise empty handler because systemcalls will get interrupted
+	 * upon signal receipt
+	 * SysV needs the handler to be rearmed
+	 */
+	signal(SIGCHLD, child_handler);
+}
+
+static int set_reuse_addr(int sockfd)
+{
+	int on = 1;
+
+	if (!reuseaddr)
+		return 0;
+	return setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR,
+			  &on, sizeof(on));
+}
+
+struct socketlist {
+	int *list;
+	size_t nr;
+	size_t alloc;
+};
+
+static const char *ip2str(int family, struct sockaddr *sin, socklen_t len)
+{
+#ifdef NO_IPV6
+	static char ip[INET_ADDRSTRLEN];
+#else
+	static char ip[INET6_ADDRSTRLEN];
+#endif
+
+	switch (family) {
+#ifndef NO_IPV6
+	case AF_INET6:
+		inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len);
+		break;
+#endif
+	case AF_INET:
+		inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len);
+		break;
+	default:
+		xsnprintf(ip, sizeof(ip), "<unknown>");
+	}
+	return ip;
+}
+
+#ifndef NO_IPV6
+
+static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist)
+{
+	int socknum = 0;
+	char pbuf[NI_MAXSERV];
+	struct addrinfo hints, *ai0, *ai;
+	int gai;
+	long flags;
+
+	xsnprintf(pbuf, sizeof(pbuf), "%d", listen_port);
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = IPPROTO_TCP;
+	hints.ai_flags = AI_PASSIVE;
+
+	gai = getaddrinfo(listen_addr, pbuf, &hints, &ai0);
+	if (gai) {
+		logerror("getaddrinfo() for %s failed: %s", listen_addr, gai_strerror(gai));
+		return 0;
+	}
+
+	for (ai = ai0; ai; ai = ai->ai_next) {
+		int sockfd;
+
+		sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+		if (sockfd < 0)
+			continue;
+		if (sockfd >= FD_SETSIZE) {
+			logerror("Socket descriptor too large");
+			close(sockfd);
+			continue;
+		}
+
+#ifdef IPV6_V6ONLY
+		if (ai->ai_family == AF_INET6) {
+			int on = 1;
+			setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY,
+				   &on, sizeof(on));
+			/* Note: error is not fatal */
+		}
+#endif
+
+		if (set_reuse_addr(sockfd)) {
+			logerror("Could not set SO_REUSEADDR: %s", strerror(errno));
+			close(sockfd);
+			continue;
+		}
+
+		set_keep_alive(sockfd);
+
+		if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
+			logerror("Could not bind to %s: %s",
+				 ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
+				 strerror(errno));
+			close(sockfd);
+			continue;	/* not fatal */
+		}
+		if (listen(sockfd, 5) < 0) {
+			logerror("Could not listen to %s: %s",
+				 ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
+				 strerror(errno));
+			close(sockfd);
+			continue;	/* not fatal */
+		}
+
+		flags = fcntl(sockfd, F_GETFD, 0);
+		if (flags >= 0)
+			fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC);
+
+		ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc);
+		socklist->list[socklist->nr++] = sockfd;
+		socknum++;
+	}
+
+	freeaddrinfo(ai0);
+
+	return socknum;
+}
+
+#else /* NO_IPV6 */
+
+static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist)
+{
+	struct sockaddr_in sin;
+	int sockfd;
+	long flags;
+
+	memset(&sin, 0, sizeof sin);
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(listen_port);
+
+	if (listen_addr) {
+		/* Well, host better be an IP address here. */
+		if (inet_pton(AF_INET, listen_addr, &sin.sin_addr.s_addr) <= 0)
+			return 0;
+	} else {
+		sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	}
+
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sockfd < 0)
+		return 0;
+
+	if (set_reuse_addr(sockfd)) {
+		logerror("Could not set SO_REUSEADDR: %s", strerror(errno));
+		close(sockfd);
+		return 0;
+	}
+
+	set_keep_alive(sockfd);
+
+	if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) {
+		logerror("Could not bind to %s: %s",
+			 ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
+			 strerror(errno));
+		close(sockfd);
+		return 0;
+	}
+
+	if (listen(sockfd, 5) < 0) {
+		logerror("Could not listen to %s: %s",
+			 ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
+			 strerror(errno));
+		close(sockfd);
+		return 0;
+	}
+
+	flags = fcntl(sockfd, F_GETFD, 0);
+	if (flags >= 0)
+		fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC);
+
+	ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc);
+	socklist->list[socklist->nr++] = sockfd;
+	return 1;
+}
+
+#endif
+
+static void socksetup(struct string_list *listen_addr, int listen_port, struct socketlist *socklist)
+{
+	if (!listen_addr->nr)
+		setup_named_sock("127.0.0.1", listen_port, socklist);
+	else {
+		int i, socknum;
+		for (i = 0; i < listen_addr->nr; i++) {
+			socknum = setup_named_sock(listen_addr->items[i].string,
+						   listen_port, socklist);
+
+			if (socknum == 0)
+				logerror("unable to allocate any listen sockets for host %s on port %u",
+					 listen_addr->items[i].string, listen_port);
+		}
+	}
+}
+
+static int service_loop(struct socketlist *socklist)
+{
+	struct pollfd *pfd;
+	int i;
+
+	CALLOC_ARRAY(pfd, socklist->nr);
+
+	for (i = 0; i < socklist->nr; i++) {
+		pfd[i].fd = socklist->list[i];
+		pfd[i].events = POLLIN;
+	}
+
+	signal(SIGCHLD, child_handler);
+
+	for (;;) {
+		int i;
+		int nr_ready;
+		int timeout = (pid_file ? 100 : -1);
+
+		check_dead_children();
+
+		nr_ready = poll(pfd, socklist->nr, timeout);
+		if (nr_ready < 0) {
+			if (errno != EINTR) {
+				logerror("Poll failed, resuming: %s",
+				      strerror(errno));
+				sleep(1);
+			}
+			continue;
+		}
+		else if (nr_ready == 0) {
+			/*
+			 * If we have a pid_file, then we watch it.
+			 * If someone deletes it, we shutdown the service.
+			 * The shell scripts in the test suite will use this.
+			 */
+			if (!pid_file || file_exists(pid_file))
+				continue;
+			goto shutdown;
+		}
+
+		for (i = 0; i < socklist->nr; i++) {
+			if (pfd[i].revents & POLLIN) {
+				union {
+					struct sockaddr sa;
+					struct sockaddr_in sai;
+#ifndef NO_IPV6
+					struct sockaddr_in6 sai6;
+#endif
+				} ss;
+				socklen_t sslen = sizeof(ss);
+				int incoming = accept(pfd[i].fd, &ss.sa, &sslen);
+				if (incoming < 0) {
+					switch (errno) {
+					case EAGAIN:
+					case EINTR:
+					case ECONNABORTED:
+						continue;
+					default:
+						die_errno("accept returned");
+					}
+				}
+				handle(incoming, &ss.sa, sslen);
+			}
+		}
+	}
+
+shutdown:
+	loginfo("Starting graceful shutdown (pid-file gone)");
+	for (i = 0; i < socklist->nr; i++)
+		close(socklist->list[i]);
+
+	return 0;
+}
+
+static int serve(struct string_list *listen_addr, int listen_port)
+{
+	struct socketlist socklist = { NULL, 0, 0 };
+
+	socksetup(listen_addr, listen_port, &socklist);
+	if (socklist.nr == 0)
+		die("unable to allocate any listen sockets on port %u",
+		    listen_port);
+
+	loginfo("Ready to rumble");
+
+	/*
+	 * Wait to create the pid-file until we've setup the sockets
+	 * and are open for business.
+	 */
+	if (pid_file)
+		write_file(pid_file, "%"PRIuMAX, (uintmax_t) getpid());
+
+	return service_loop(&socklist);
+}
+
+//////////////////////////////////////////////////////////////////
+// This section is executed by both the primary instance and all
+// worker instances.  So, yes, each child-process re-parses the
+// command line argument and re-discovers how it should behave.
+//////////////////////////////////////////////////////////////////
+
+int cmd_main(int argc, const char **argv)
+{
+	int listen_port = 0;
+	struct string_list listen_addr = STRING_LIST_INIT_NODUP;
+	int worker_mode = 0;
+	int i;
+	struct auth_module *mod = NULL;
+
+	trace2_cmd_name("test-http-server");
+	setup_git_directory_gently(NULL);
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+		const char *v;
+
+		if (skip_prefix(arg, "--listen=", &v)) {
+			string_list_append(&listen_addr, xstrdup_tolower(v));
+			continue;
+		}
+		if (skip_prefix(arg, "--port=", &v)) {
+			char *end;
+			unsigned long n;
+			n = strtoul(v, &end, 0);
+			if (*v && !*end) {
+				listen_port = n;
+				continue;
+			}
+		}
+		if (!strcmp(arg, "--worker")) {
+			worker_mode = 1;
+			trace2_cmd_mode("worker");
+			continue;
+		}
+		if (!strcmp(arg, "--verbose")) {
+			verbose = 1;
+			continue;
+		}
+		if (skip_prefix(arg, "--timeout=", &v)) {
+			timeout = atoi(v);
+			continue;
+		}
+		if (skip_prefix(arg, "--init-timeout=", &v)) {
+			init_timeout = atoi(v);
+			continue;
+		}
+		if (skip_prefix(arg, "--max-connections=", &v)) {
+			max_connections = atoi(v);
+			if (max_connections < 0)
+				max_connections = 0; /* unlimited */
+			continue;
+		}
+		if (!strcmp(arg, "--reuseaddr")) {
+			reuseaddr = 1;
+			continue;
+		}
+		if (skip_prefix(arg, "--pid-file=", &v)) {
+			pid_file = v;
+			continue;
+		}
+		if (skip_prefix(arg, "--allow-anonymous", &v)) {
+			allow_anonymous = 1;
+			continue;
+		}
+		if (skip_prefix(arg, "--auth=", &v)) {
+			struct strbuf **p = strbuf_split_str(v, ':', 2);
+
+			if (!p[0]) {
+				error("invalid argument '%s'", v);
+				usage(test_http_auth_usage);
+			}
+
+			// trim trailing ':'
+			if (p[1])
+				strbuf_setlen(p[0], p[0]->len - 1);
+
+			if (get_auth_module(p[0])) {
+				error("duplicate auth scheme '%s'\n", p[0]->buf);
+				usage(test_http_auth_usage);
+			}
+
+			mod = xmalloc(sizeof(struct auth_module));
+			mod->scheme = xstrdup(p[0]->buf);
+			mod->challenge_params = p[1] ? xstrdup(p[1]->buf) : NULL;
+			mod->tokens = xmalloc(sizeof(struct string_list));
+			string_list_init_dup(mod->tokens);
+
+			add_auth_module(mod);
+
+			strbuf_list_free(p);
+			continue;
+		}
+		if (skip_prefix(arg, "--auth-token=", &v)) {
+			struct strbuf **p = strbuf_split_str(v, ':', 2);
+			if (!p[0]) {
+				error("invalid argument '%s'", v);
+				usage(test_http_auth_usage);
+			}
+
+			if (!p[1]) {
+				error("missing token value '%s'\n", v);
+				usage(test_http_auth_usage);
+			}
+
+			// trim trailing ':'
+			strbuf_setlen(p[0], p[0]->len - 1);
+
+			mod = get_auth_module(p[0]);
+			if (!mod) {
+				error("auth scheme not defined '%s'\n", p[0]->buf);
+				usage(test_http_auth_usage);
+			}
+
+			string_list_append(mod->tokens, p[1]->buf);
+			strbuf_list_free(p);
+			continue;
+		}
+
+		fprintf(stderr, "error: unknown argument '%s'\n", arg);
+		usage(test_http_auth_usage);
+	}
+
+	/* avoid splitting a message in the middle */
+	setvbuf(stderr, NULL, _IOFBF, 4096);
+
+	if (listen_port == 0)
+		listen_port = DEFAULT_GIT_PORT;
+
+	/*
+	 * If no --listen=<addr> args are given, the setup_named_sock()
+	 * code will use receive a NULL address and set INADDR_ANY.
+	 * This exposes both internal and external interfaces on the
+	 * port.
+	 *
+	 * Disallow that and default to the internal-use-only loopback
+	 * address.
+	 */
+	if (!listen_addr.nr)
+		string_list_append(&listen_addr, "127.0.0.1");
+
+	/*
+	 * worker_mode is set in our own child process instances
+	 * (that are bound to a connected socket from a client).
+	 */
+	if (worker_mode)
+		return worker();
+
+	/*
+	 * `cld_argv` is a bit of a clever hack. The top-level instance
+	 * of test-http-server does the normal bind/listen/accept stuff.
+	 * For each incoming socket, the top-level process spawns
+	 * a child instance of test-http-server *WITH* the additional
+	 * `--worker` argument. This causes the child to set `worker_mode`
+	 * and immediately call `worker()` using the connected socket (and
+	 * without the usual need for fork() or threads).
+	 *
+	 * The magic here is made possible because `cld_argv` is static
+	 * and handle() (called by service_loop()) knows about it.
+	 */
+	strvec_push(&cld_argv, argv[0]);
+	strvec_push(&cld_argv, "--worker");
+	for (i = 1; i < argc; ++i)
+		strvec_push(&cld_argv, argv[i]);
+
+	/*
+	 * Setup primary instance to listen for connections.
+	 */
+	return serve(&listen_addr, listen_port);
+}
diff --git a/t/t5556-http-auth.sh b/t/t5556-http-auth.sh
new file mode 100755
index 00000000000..43f1791a0fe
--- /dev/null
+++ b/t/t5556-http-auth.sh
@@ -0,0 +1,260 @@ 
+#!/bin/sh
+
+test_description='test http auth header and credential helper interop'
+
+. ./test-lib.sh
+
+test_set_port GIT_TEST_HTTP_PROTOCOL_PORT
+
+# Setup a repository
+#
+REPO_DIR="$(pwd)"/repo
+
+# Setup some lookback URLs where test-http-server will be listening.
+# We will spawn it directly inside the repo directory, so we avoid
+# any need to configure directory mappings etc - we only serve this
+# repository from the root '/' of the server.
+#
+HOST_PORT=127.0.0.1:$GIT_TEST_HTTP_PROTOCOL_PORT
+ORIGIN_URL=http://$HOST_PORT/
+
+# The pid-file is created by test-http-server when it starts.
+# The server will shutdown if/when we delete it (this is easier than
+# killing it by PID).
+#
+PID_FILE="$(pwd)"/pid-file.pid
+SERVER_LOG="$(pwd)"/OUT.server.log
+
+PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH
+CREDENTIAL_HELPER="$GIT_BUILD_DIR/t/helper/test-credential-helper-replay.sh" \
+	&& export CREDENTIAL_HELPER
+
+test_expect_success 'setup repos' '
+	test_create_repo "$REPO_DIR" &&
+	git -C "$REPO_DIR" branch -M main
+'
+
+stop_http_server () {
+	if ! test -f "$PID_FILE"
+	then
+		return 0
+	fi
+	#
+	# The server will shutdown automatically when we delete the pid-file.
+	#
+	rm -f "$PID_FILE"
+	#
+	# Give it a few seconds to shutdown (mainly to completely release the
+	# port before the next test start another instance and it attempts to
+	# bind to it).
+	#
+	for k in 0 1 2 3 4
+	do
+		if grep -q "Starting graceful shutdown" "$SERVER_LOG"
+		then
+			return 0
+		fi
+		sleep 1
+	done
+
+	echo "stop_http_server: timeout waiting for server shutdown"
+	return 1
+}
+
+start_http_server () {
+	#
+	# Launch our server into the background in repo_dir.
+	#
+	(
+		cd "$REPO_DIR"
+		test-http-server --verbose \
+			--listen=127.0.0.1 \
+			--port=$GIT_TEST_HTTP_PROTOCOL_PORT \
+			--reuseaddr \
+			--pid-file="$PID_FILE" \
+			"$@" \
+			2>"$SERVER_LOG" &
+	)
+	#
+	# Give it a few seconds to get started.
+	#
+	for k in 0 1 2 3 4
+	do
+		if test -f "$PID_FILE"
+		then
+			return 0
+		fi
+		sleep 1
+	done
+
+	echo "start_http_server: timeout waiting for server startup"
+	return 1
+}
+
+per_test_cleanup () {
+	stop_http_server &&
+	rm -f OUT.* &&
+	rm -f *.cred
+}
+
+test_expect_success 'http auth anonymous no challenge' '
+	test_when_finished "per_test_cleanup" &&
+	start_http_server --allow-anonymous &&
+
+	# Attempt to read from a protected repository
+	git ls-remote $ORIGIN_URL
+'
+
+test_expect_success 'http auth www-auth headers to credential helper bearer valid' '
+	test_when_finished "per_test_cleanup" &&
+	start_http_server \
+		--auth=bearer:authority=\"id.example.com\"\ q=1\ p=0 \
+		--auth=basic:realm=\"example.com\" \
+		--auth-token=bearer:secret-token &&
+
+	cat >get-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	wwwauth[]=bearer authority="id.example.com" q=1 p=0
+	wwwauth[]=basic realm="example.com"
+	EOF
+
+	cat >store-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=secret-token
+	authtype=bearer
+	EOF
+
+	cat >get-response.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=secret-token
+	authtype=bearer
+	EOF
+
+	git -c credential.helper="$CREDENTIAL_HELPER" ls-remote $ORIGIN_URL &&
+
+	test_cmp get-expected.cred get-actual.cred &&
+	test_cmp store-expected.cred store-actual.cred
+'
+
+test_expect_success 'http auth www-auth headers to credential helper basic valid' '
+	test_when_finished "per_test_cleanup" &&
+	# base64("alice:secret-passwd")
+	USERPASS64=YWxpY2U6c2VjcmV0LXBhc3N3ZA== &&
+	export USERPASS64 &&
+
+	start_http_server \
+		--auth=bearer:authority=\"id.example.com\"\ q=1\ p=0 \
+		--auth=basic:realm=\"example.com\" \
+		--auth-token=basic:$USERPASS64 &&
+
+	cat >get-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	wwwauth[]=bearer authority="id.example.com" q=1 p=0
+	wwwauth[]=basic realm="example.com"
+	EOF
+
+	cat >store-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=secret-passwd
+	authtype=basic
+	EOF
+
+	cat >get-response.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=secret-passwd
+	authtype=basic
+	EOF
+
+	git -c credential.helper="$CREDENTIAL_HELPER" ls-remote $ORIGIN_URL &&
+
+	test_cmp get-expected.cred get-actual.cred &&
+	test_cmp store-expected.cred store-actual.cred
+'
+
+test_expect_success 'http auth www-auth headers to credential helper custom scheme' '
+	test_when_finished "per_test_cleanup" &&
+	start_http_server \
+		--auth=foobar:alg=test\ widget=1 \
+		--auth=bearer:authority=\"id.example.com\"\ q=1\ p=0 \
+		--auth=basic:realm=\"example.com\" \
+		--auth-token=foobar:SECRET-FOOBAR-VALUE &&
+
+	cat >get-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	wwwauth[]=foobar alg=test widget=1
+	wwwauth[]=bearer authority="id.example.com" q=1 p=0
+	wwwauth[]=basic realm="example.com"
+	EOF
+
+	cat >store-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=SECRET-FOOBAR-VALUE
+	authtype=foobar
+	EOF
+
+	cat >get-response.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=SECRET-FOOBAR-VALUE
+	authtype=foobar
+	EOF
+
+	git -c credential.helper="$CREDENTIAL_HELPER" ls-remote $ORIGIN_URL &&
+
+	test_cmp get-expected.cred get-actual.cred &&
+	test_cmp store-expected.cred store-actual.cred
+'
+
+test_expect_success 'http auth www-auth headers to credential helper invalid' '
+	test_when_finished "per_test_cleanup" &&
+	start_http_server \
+		--auth=bearer:authority=\"id.example.com\"\ q=1\ p=0 \
+		--auth=basic:realm=\"example.com\" \
+		--auth-token=bearer:secret-token &&
+
+	cat >get-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	wwwauth[]=bearer authority="id.example.com" q=1 p=0
+	wwwauth[]=basic realm="example.com"
+	EOF
+
+	cat >erase-expected.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=invalid-token
+	authtype=bearer
+	wwwauth[]=bearer authority="id.example.com" q=1 p=0
+	wwwauth[]=basic realm="example.com"
+	EOF
+
+	cat >get-response.cred <<-EOF &&
+	protocol=http
+	host=$HOST_PORT
+	username=alice
+	password=invalid-token
+	authtype=bearer
+	EOF
+
+	test_must_fail git -c credential.helper="$CREDENTIAL_HELPER" ls-remote $ORIGIN_URL &&
+
+	test_cmp get-expected.cred get-actual.cred &&
+	test_cmp erase-expected.cred erase-actual.cred
+'
+
+test_done