Message ID | 5c4e36e23eecbb7841078939a982b7150e2f4ab8.1670880984.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enhance credential helper protocol to include auth headers | expand |
Matthew John Cheetham via GitGitGadget wrote: > +/* > + * Read the HTTP request up to the start of the optional message-body. > + * We do this byte-by-byte because we have keep-alive turned on and > + * cannot rely on an EOF. > + * > + * https://tools.ietf.org/html/rfc7230 > + * > + * We cannot call die() here because our caller needs to properly > + * respond to the client and/or close the socket before this > + * child exits so that the client doesn't get a connection reset > + * by peer error. > + */ > +static enum worker_result req__read(struct req *req, int fd) > +{ > + struct strbuf h = STRBUF_INIT; > + struct string_list start_line_fields = STRING_LIST_INIT_DUP; > + int nr_start_line_fields; > + const char *uri_target; > + const char *query; > + char *hp; > + const char *hv; > + > + enum worker_result result = WR_OK; > + > + /* > + * Read line 0 of the request and split it into component parts: > + * > + * <method> SP <uri-target> SP <HTTP-version> CRLF > + * > + */ > + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) { > + result = WR_OK | WR_HANGUP; > + goto done; > + } > + > + strbuf_trim_trailing_newline(&req->start_line); > + > + nr_start_line_fields = string_list_split(&start_line_fields, > + req->start_line.buf, > + ' ', -1); > + if (nr_start_line_fields != 3) { > + logerror("could not parse request start-line '%s'", > + req->start_line.buf); > + result = WR_IO_ERROR; > + goto done; > + } > + > + req->method = xstrdup(start_line_fields.items[0].string); > + req->http_version = xstrdup(start_line_fields.items[2].string); > + > + uri_target = start_line_fields.items[1].string; > + > + if (strcmp(req->http_version, "HTTP/1.1")) { > + logerror("unsupported version '%s' (expecting HTTP/1.1)", > + req->http_version); > + result = WR_IO_ERROR; > + goto done; > + } > + > + query = strchr(uri_target, '?'); > + > + if (query) { > + strbuf_add(&req->uri_path, uri_target, (query - uri_target)); > + strbuf_trim_trailing_dir_sep(&req->uri_path); > + strbuf_addstr(&req->query_args, query + 1); > + } else { > + strbuf_addstr(&req->uri_path, uri_target); > + strbuf_trim_trailing_dir_sep(&req->uri_path); > + } This "line 0" parsing looks good, and aligns with the RFC you linked (specifically section 3.1.1 [1]). [1] https://www.rfc-editor.org/rfc/rfc7230#section-3.1.1 > + > + /* > + * Read the set of HTTP headers into a string-list. > + */ > + while (1) { > + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) > + goto done; > + strbuf_trim_trailing_newline(&h); > + > + if (!h.len) > + goto done; /* a blank line ends the header */ > + > + hp = strbuf_detach(&h, NULL); > + string_list_append(&req->header_list, hp); > + > + /* store common request headers separately */ > + if (skip_prefix(hp, "Content-Type: ", &hv)) { > + req->content_type = hv; > + } else if (skip_prefix(hp, "Content-Length: ", &hv)) { > + req->content_length = strtol(hv, &hp, 10); > + } The "separately" is somewhat confusing - you unconditionally add 'hp' to 'req->header_list', so the "Content-Type" and "Content-Length" headers are included there as well. If that's the desired behavior, a comment like "Also store common headers as 'req' fields" might be clearer. > + } > + > + /* > + * We do not attempt to read the <message-body>, if it exists. > + * We let our caller read/chunk it in as appropriate. > + */ > + > +done: > + string_list_clear(&start_line_fields, 0); > + > + /* > + * This is useful for debugging the request, but very noisy. > + */ > + if (trace2_is_enabled()) { 'trace2_printf()' is gated internally by 'trace2_enabled' anyway, so I don't think this 'if()' is necessary. You could add a 'DEBUG_HTTP_SERVER' preprocessor directive (like 'DEBUG_CACHE_TREE' in 'cache-tree.c') if you wanted to prevent these printouts unless a developer sets it to '1'. > + struct string_list_item *item; > + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); > + trace2_printf("%s: hver: %s", TR2_CAT, req->http_version); > + trace2_printf("%s: hmth: %s", TR2_CAT, req->method); > + trace2_printf("%s: path: %s", TR2_CAT, req->uri_path.buf); > + trace2_printf("%s: qury: %s", TR2_CAT, req->query_args.buf); > + if (req->content_length >= 0) > + trace2_printf("%s: clen: %d", TR2_CAT, req->content_length); > + if (req->content_type) > + trace2_printf("%s: ctyp: %s", TR2_CAT, req->content_type); > + for_each_string_list_item(item, &req->header_list) > + trace2_printf("%s: hdrs: %s", TR2_CAT, item->string); > + } > + > + return result; > +} > + > +static enum worker_result dispatch(struct req *req) > +{ > + return send_http_error(1, 501, "Not Implemented", -1, NULL, > + WR_OK | WR_HANGUP); Although the request is now being read & parsed, the response creation code is still a hardcoded "Not Implemented". This means that the now-parsed 'req' is be temporarily unused, but I think that's reasonable (since it allows for breaking up the implementation of 'test-http-server' into multiple, less overwhelming patches). > +} > + > static enum worker_result worker(void) > { > + struct req req = REQ__INIT; > char *client_addr = getenv("REMOTE_ADDR"); > char *client_port = getenv("REMOTE_PORT"); > enum worker_result wr = WR_OK; > @@ -160,8 +324,16 @@ static enum worker_result worker(void) > set_keep_alive(0); > > while (1) { > - wr = send_http_error(1, 501, "Not Implemented", -1, NULL, > - WR_OK | WR_HANGUP); > + req__release(&req); > + > + alarm(init_timeout ? init_timeout : timeout); > + wr = req__read(&req, 0); > + alarm(0); I know 'init_timeout' and 'timeout' were pulled from 'daemon.c', but what's the difference between them/why do they both exist? It looks like 'init_timeout' just acts as a permanent override to the value of 'timeout'. > + > + if (wr & WR_STOP_THE_MUSIC) > + break; > + > + wr = dispatch(&req); > if (wr & WR_STOP_THE_MUSIC) > break; > }
On 2022-12-14 15:18, Victoria Dye wrote: > Matthew John Cheetham via GitGitGadget wrote: >> +/* >> + * Read the HTTP request up to the start of the optional message-body. >> + * We do this byte-by-byte because we have keep-alive turned on and >> + * cannot rely on an EOF. >> + * >> + * https://tools.ietf.org/html/rfc7230 >> + * >> + * We cannot call die() here because our caller needs to properly >> + * respond to the client and/or close the socket before this >> + * child exits so that the client doesn't get a connection reset >> + * by peer error. >> + */ >> +static enum worker_result req__read(struct req *req, int fd) >> +{ >> + struct strbuf h = STRBUF_INIT; >> + struct string_list start_line_fields = STRING_LIST_INIT_DUP; >> + int nr_start_line_fields; >> + const char *uri_target; >> + const char *query; >> + char *hp; >> + const char *hv; >> + >> + enum worker_result result = WR_OK; >> + >> + /* >> + * Read line 0 of the request and split it into component parts: >> + * >> + * <method> SP <uri-target> SP <HTTP-version> CRLF >> + * >> + */ >> + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) { >> + result = WR_OK | WR_HANGUP; >> + goto done; >> + } >> + >> + strbuf_trim_trailing_newline(&req->start_line); >> + >> + nr_start_line_fields = string_list_split(&start_line_fields, >> + req->start_line.buf, >> + ' ', -1); >> + if (nr_start_line_fields != 3) { >> + logerror("could not parse request start-line '%s'", >> + req->start_line.buf); >> + result = WR_IO_ERROR; >> + goto done; >> + } >> + >> + req->method = xstrdup(start_line_fields.items[0].string); >> + req->http_version = xstrdup(start_line_fields.items[2].string); >> + >> + uri_target = start_line_fields.items[1].string; >> + >> + if (strcmp(req->http_version, "HTTP/1.1")) { >> + logerror("unsupported version '%s' (expecting HTTP/1.1)", >> + req->http_version); >> + result = WR_IO_ERROR; >> + goto done; >> + } >> + >> + query = strchr(uri_target, '?'); >> + >> + if (query) { >> + strbuf_add(&req->uri_path, uri_target, (query - uri_target)); >> + strbuf_trim_trailing_dir_sep(&req->uri_path); >> + strbuf_addstr(&req->query_args, query + 1); >> + } else { >> + strbuf_addstr(&req->uri_path, uri_target); >> + strbuf_trim_trailing_dir_sep(&req->uri_path); >> + } > > This "line 0" parsing looks good, and aligns with the RFC you linked > (specifically section 3.1.1 [1]). > > [1] https://www.rfc-editor.org/rfc/rfc7230#section-3.1.1 > >> + >> + /* >> + * Read the set of HTTP headers into a string-list. >> + */ >> + while (1) { >> + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) >> + goto done; >> + strbuf_trim_trailing_newline(&h); >> + >> + if (!h.len) >> + goto done; /* a blank line ends the header */ >> + >> + hp = strbuf_detach(&h, NULL); >> + string_list_append(&req->header_list, hp); >> + >> + /* store common request headers separately */ >> + if (skip_prefix(hp, "Content-Type: ", &hv)) { >> + req->content_type = hv; >> + } else if (skip_prefix(hp, "Content-Length: ", &hv)) { >> + req->content_length = strtol(hv, &hp, 10); >> + } > > The "separately" is somewhat confusing - you unconditionally add 'hp' to > 'req->header_list', so the "Content-Type" and "Content-Length" headers are > included there as well. If that's the desired behavior, a comment like "Also > store common headers as 'req' fields" might be clearer. Will clarify this comment in next roll. You are correct, we *also* store these common headers on `struct req`. >> + } >> + >> + /* >> + * We do not attempt to read the <message-body>, if it exists. >> + * We let our caller read/chunk it in as appropriate. >> + */ >> + >> +done: >> + string_list_clear(&start_line_fields, 0); >> + >> + /* >> + * This is useful for debugging the request, but very noisy. >> + */ >> + if (trace2_is_enabled()) { > > 'trace2_printf()' is gated internally by 'trace2_enabled' anyway, so I don't > think this 'if()' is necessary. You could add a 'DEBUG_HTTP_SERVER' > preprocessor directive (like 'DEBUG_CACHE_TREE' in 'cache-tree.c') if you > wanted to prevent these printouts unless a developer sets it to '1'. The overarching `trace2_is_enabled()` call is to avoid any possible repeated evaluation within `trace2_printf` for potentially multiple request headers. >> + struct string_list_item *item; >> + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); >> + trace2_printf("%s: hver: %s", TR2_CAT, req->http_version); >> + trace2_printf("%s: hmth: %s", TR2_CAT, req->method); >> + trace2_printf("%s: path: %s", TR2_CAT, req->uri_path.buf); >> + trace2_printf("%s: qury: %s", TR2_CAT, req->query_args.buf); >> + if (req->content_length >= 0) >> + trace2_printf("%s: clen: %d", TR2_CAT, req->content_length); >> + if (req->content_type) >> + trace2_printf("%s: ctyp: %s", TR2_CAT, req->content_type); >> + for_each_string_list_item(item, &req->header_list) >> + trace2_printf("%s: hdrs: %s", TR2_CAT, item->string); >> + } >> + >> + return result; >> +} >> + >> +static enum worker_result dispatch(struct req *req) >> +{ >> + return send_http_error(1, 501, "Not Implemented", -1, NULL, >> + WR_OK | WR_HANGUP); > > Although the request is now being read & parsed, the response creation code > is still a hardcoded "Not Implemented". This means that the now-parsed 'req' > is be temporarily unused, but I think that's reasonable (since it allows for > breaking up the implementation of 'test-http-server' into multiple, less > overwhelming patches). > >> +} >> + >> static enum worker_result worker(void) >> { >> + struct req req = REQ__INIT; >> char *client_addr = getenv("REMOTE_ADDR"); >> char *client_port = getenv("REMOTE_PORT"); >> enum worker_result wr = WR_OK; >> @@ -160,8 +324,16 @@ static enum worker_result worker(void) >> set_keep_alive(0); >> >> while (1) { >> - wr = send_http_error(1, 501, "Not Implemented", -1, NULL, >> - WR_OK | WR_HANGUP); >> + req__release(&req); >> + >> + alarm(init_timeout ? init_timeout : timeout); >> + wr = req__read(&req, 0); >> + alarm(0); > > I know 'init_timeout' and 'timeout' were pulled from 'daemon.c', but what's > the difference between them/why do they both exist? It looks like > 'init_timeout' just acts as a permanent override to the value of 'timeout'. Good catch. This split made sense in daemon.c whereby the `--timeout` are would be passed to the `git-upload-pack` command, and `--init-timeout` is used as the timeout value for the daemon server itself. In the test HTTP server we don't need the differentiation so I'll just use the simpler `--timeout` arg. >> + >> + if (wr & WR_STOP_THE_MUSIC) >> + break; >> + >> + wr = dispatch(&req); >> if (wr & WR_STOP_THE_MUSIC) >> break; >> } > Thanks, Matthew
diff --git a/t/helper/test-http-server.c b/t/helper/test-http-server.c index 53508639714..7bde678e264 100644 --- a/t/helper/test-http-server.c +++ b/t/helper/test-http-server.c @@ -97,6 +97,42 @@ enum worker_result { WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP), }; +/* + * Fields from a parsed HTTP request. + */ +struct req { + struct strbuf start_line; + + const char *method; + const char *http_version; + + struct strbuf uri_path; + struct strbuf query_args; + + struct string_list header_list; + const char *content_type; + ssize_t content_length; +}; + +#define REQ__INIT { \ + .start_line = STRBUF_INIT, \ + .uri_path = STRBUF_INIT, \ + .query_args = STRBUF_INIT, \ + .header_list = STRING_LIST_INIT_NODUP, \ + .content_type = NULL, \ + .content_length = -1 \ + } + +static void req__release(struct req *req) +{ + strbuf_release(&req->start_line); + + strbuf_release(&req->uri_path); + strbuf_release(&req->query_args); + + string_list_clear(&req->header_list, 0); +} + static enum worker_result send_http_error( int fd, int http_code, const char *http_code_name, @@ -148,8 +184,136 @@ done: return wr; } +/* + * Read the HTTP request up to the start of the optional message-body. + * We do this byte-by-byte because we have keep-alive turned on and + * cannot rely on an EOF. + * + * https://tools.ietf.org/html/rfc7230 + * + * We cannot call die() here because our caller needs to properly + * respond to the client and/or close the socket before this + * child exits so that the client doesn't get a connection reset + * by peer error. + */ +static enum worker_result req__read(struct req *req, int fd) +{ + struct strbuf h = STRBUF_INIT; + struct string_list start_line_fields = STRING_LIST_INIT_DUP; + int nr_start_line_fields; + const char *uri_target; + const char *query; + char *hp; + const char *hv; + + enum worker_result result = WR_OK; + + /* + * Read line 0 of the request and split it into component parts: + * + * <method> SP <uri-target> SP <HTTP-version> CRLF + * + */ + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) { + result = WR_OK | WR_HANGUP; + goto done; + } + + strbuf_trim_trailing_newline(&req->start_line); + + nr_start_line_fields = string_list_split(&start_line_fields, + req->start_line.buf, + ' ', -1); + if (nr_start_line_fields != 3) { + logerror("could not parse request start-line '%s'", + req->start_line.buf); + result = WR_IO_ERROR; + goto done; + } + + req->method = xstrdup(start_line_fields.items[0].string); + req->http_version = xstrdup(start_line_fields.items[2].string); + + uri_target = start_line_fields.items[1].string; + + if (strcmp(req->http_version, "HTTP/1.1")) { + logerror("unsupported version '%s' (expecting HTTP/1.1)", + req->http_version); + result = WR_IO_ERROR; + goto done; + } + + query = strchr(uri_target, '?'); + + if (query) { + strbuf_add(&req->uri_path, uri_target, (query - uri_target)); + strbuf_trim_trailing_dir_sep(&req->uri_path); + strbuf_addstr(&req->query_args, query + 1); + } else { + strbuf_addstr(&req->uri_path, uri_target); + strbuf_trim_trailing_dir_sep(&req->uri_path); + } + + /* + * Read the set of HTTP headers into a string-list. + */ + while (1) { + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) + goto done; + strbuf_trim_trailing_newline(&h); + + if (!h.len) + goto done; /* a blank line ends the header */ + + hp = strbuf_detach(&h, NULL); + string_list_append(&req->header_list, hp); + + /* store common request headers separately */ + if (skip_prefix(hp, "Content-Type: ", &hv)) { + req->content_type = hv; + } else if (skip_prefix(hp, "Content-Length: ", &hv)) { + req->content_length = strtol(hv, &hp, 10); + } + } + + /* + * We do not attempt to read the <message-body>, if it exists. + * We let our caller read/chunk it in as appropriate. + */ + +done: + string_list_clear(&start_line_fields, 0); + + /* + * This is useful for debugging the request, but very noisy. + */ + if (trace2_is_enabled()) { + struct string_list_item *item; + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); + trace2_printf("%s: hver: %s", TR2_CAT, req->http_version); + trace2_printf("%s: hmth: %s", TR2_CAT, req->method); + trace2_printf("%s: path: %s", TR2_CAT, req->uri_path.buf); + trace2_printf("%s: qury: %s", TR2_CAT, req->query_args.buf); + if (req->content_length >= 0) + trace2_printf("%s: clen: %d", TR2_CAT, req->content_length); + if (req->content_type) + trace2_printf("%s: ctyp: %s", TR2_CAT, req->content_type); + for_each_string_list_item(item, &req->header_list) + trace2_printf("%s: hdrs: %s", TR2_CAT, item->string); + } + + return result; +} + +static enum worker_result dispatch(struct req *req) +{ + return send_http_error(1, 501, "Not Implemented", -1, NULL, + WR_OK | WR_HANGUP); +} + static enum worker_result worker(void) { + struct req req = REQ__INIT; char *client_addr = getenv("REMOTE_ADDR"); char *client_port = getenv("REMOTE_PORT"); enum worker_result wr = WR_OK; @@ -160,8 +324,16 @@ static enum worker_result worker(void) set_keep_alive(0); while (1) { - wr = send_http_error(1, 501, "Not Implemented", -1, NULL, - WR_OK | WR_HANGUP); + req__release(&req); + + alarm(init_timeout ? init_timeout : timeout); + wr = req__read(&req, 0); + alarm(0); + + if (wr & WR_STOP_THE_MUSIC) + break; + + wr = dispatch(&req); if (wr & WR_STOP_THE_MUSIC) break; }