diff mbox series

[v2,1/2] Change how HTTP response body is returned

Message ID 20181229194447.157763-2-masayasuzuki@google.com (mailing list archive)
State Superseded, archived
Headers show
Series Show HTTP headers of failed requests with GIT_CURL_VERBOSE | expand

Commit Message

Masaya Suzuki Dec. 29, 2018, 7:44 p.m. UTC
This changes the way HTTP response body is returned in
http_request_reauth and post_rpc.

1. http_request_reauth makes up to two requests; one without a
   credential and one with a credential. The first request can fail if
   it needs a credential. When the keep_error option is specified, the
   response to the first request can be written to the HTTP response
   body destination. If the response body destination is a string
   buffer, it erases the buffer before making the second request. By
   introducing http_response_dest, it can handle the case that the
   destination is a file handle.
2. post_rpc makes an HTTP request and the response body is directly
   written to a file descriptor. This makes it check the HTTP status
   code before writing it, and do not write the response body if it's an
   error response. It's ok without this check now because post_rpc makes
   a request with CURLOPT_FAILONERROR, and libcurl won't call the
   callback if the response has an error status code.

Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
---
 http.c        | 99 +++++++++++++++++++++++++++++----------------------
 remote-curl.c | 29 ++++++++++++---
 2 files changed, 81 insertions(+), 47 deletions(-)

Comments

Junio C Hamano Jan. 3, 2019, 7:09 p.m. UTC | #1
Masaya Suzuki <masayasuzuki@google.com> writes:

> Subject: Re: [PATCH v2 1/2] Change how HTTP response body is returned

Perhaps:

    Subject: http: change the way response body is returned

but if we can state why we want to do so concisely, that would be
even better.

> This changes the way HTTP response body is returned in
> http_request_reauth and post_rpc.
>
> 1. http_request_reauth makes up to two requests; one without a
>    credential and one with a credential. The first request can fail if
>    it needs a credential. When the keep_error option is specified, the
>    response to the first request can be written to the HTTP response
>    body destination. If the response body destination is a string
>    buffer, it erases the buffer before making the second request. By
>    introducing http_response_dest, it can handle the case that the
>    destination is a file handle.
> 2. post_rpc makes an HTTP request and the response body is directly
>    written to a file descriptor. This makes it check the HTTP status
>    code before writing it, and do not write the response body if it's an
>    error response. It's ok without this check now because post_rpc makes
>    a request with CURLOPT_FAILONERROR, and libcurl won't call the
>    callback if the response has an error status code.

The above may be an accurate description of what the code will do
with this patch, but I cannot quite read the reason why we would
want the code to behave so in the first place.

>
> Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
> ---
>  http.c        | 99 +++++++++++++++++++++++++++++----------------------
>  remote-curl.c | 29 ++++++++++++---
>  2 files changed, 81 insertions(+), 47 deletions(-)
>
> diff --git a/http.c b/http.c
> index eacc2a75e..d23417670 100644
> --- a/http.c
> +++ b/http.c
> @@ -165,6 +165,19 @@ static int http_schannel_check_revoke = 1;
>   */
>  static int http_schannel_use_ssl_cainfo;
>  
> +/*
> + * Where to store the result of http_request.
> + *
> + * At most one of buffer or file can be non-NULL. The buffer and file are not
> + * allocated by http_request, and the caller is responsible for releasing them.
> + */
> +struct http_response_dest {
> +	struct strbuf *buffer;
> +
> +	FILE *file;
> +	const char *filename;
> +};
> +
>  size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
>  {
>  	size_t size = eltsize * nmemb;
> @@ -1794,12 +1807,8 @@ static void http_opt_request_remainder(CURL *curl, off_t pos)
>  	curl_easy_setopt(curl, CURLOPT_RANGE, buf);
>  }
>  
> -/* http_request() targets */
> -#define HTTP_REQUEST_STRBUF	0
> -#define HTTP_REQUEST_FILE	1
> -
>  static int http_request(const char *url,
> -			void *result, int target,
> +			struct http_response_dest *dest,
>  			const struct http_get_options *options)
>  {
>  	struct active_request_slot *slot;
> @@ -1812,21 +1821,23 @@ static int http_request(const char *url,
>  	slot = get_active_slot();
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
>  
> -	if (result == NULL) {
> -		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
> -	} else {
> +	if (dest->file) {
> +		off_t posn = ftello(dest->file);
>  		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
> -		curl_easy_setopt(slot->curl, CURLOPT_FILE, result);

OK, so it used to be that we can either discard the result
(i.e. NOBODY) or send the result to CURLOPT_FILE, and the latter has
two options (sent to a file, or sent to in-core buffer).  The way
these three choices are given were with the result pointer and the
target enum.

That is replaced by a struct that allows the same three choices.

Makes sense so far.

> @@ -1930,10 +1941,10 @@ static int update_url_from_redirect(struct strbuf *base,
>  }
>  
>  static int http_request_reauth(const char *url,
> -			       void *result, int target,
> +			       struct http_response_dest *dest,
>  			       struct http_get_options *options)
>  {
> -	int ret = http_request(url, result, target, options);
> +	int ret = http_request(url, dest, options);

Just adjusting the calling convention to the change we saw above.

> @@ -1949,32 +1960,34 @@ static int http_request_reauth(const char *url,
>  	if (ret != HTTP_REAUTH)
>  		return ret;
>  
> -	/*
> -	 * If we are using KEEP_ERROR, the previous request may have
> -	 * put cruft into our output stream; we should clear it out before
> -	 * making our next request. We only know how to do this for
> -	 * the strbuf case, but that is enough to satisfy current callers.
> -	 */
> -	if (options && options->keep_error) {
> -		switch (target) {
> -		case HTTP_REQUEST_STRBUF:
> -			strbuf_reset(result);
> -			break;
> -		default:
> -			BUG("HTTP_KEEP_ERROR is only supported with strbufs");

Now it gets interesting.  We used to allow keep-error only when
receiving to in-core buffer.

> +	if (dest->file) {
> +		/*
> +		 * At this point, the file contains the response body of the
> +		 * previous request. We need to truncate the file.
> +		 */
> +		FILE *new_file = freopen(dest->filename, "w", dest->file);

Now freopen() lets us restart the file anew with a new "FILE *".

> +		if (new_file == NULL) {
> +			error("Unable to open local file %s", dest->filename);

error_errno(), perhaps?

At this point, I presume that dest->file is closed by the failed
freopen(), but dest->file is still non-NULL and causes further calls
to http_request() with this dest would be a disaster?  As long as
the caller of this function reacts to HTTP_ERROR and kill the dest,
it would be fine.

> +			return HTTP_ERROR;
>  		}
> +		dest->file = new_file;
> +	} else if (dest->buffer) {
> +		strbuf_reset(dest->buffer);
>  	}

OK.

>  	credential_fill(&http_auth);
>  
> -	return http_request(url, result, target, options);
> +	return http_request(url, dest, options);
>  }

So far, I spotted one reason why this patch wants to exist: it used
to be that keep_error was impossible when sending to a file.  It
somehow wants to allow us to do so (even though it still is unclear
who that new caller that wants to use keep_error is, and for what
purpose it wants to use that facility).

Perhaps this step can be split into at least two steps?  The first
one is to turn <result, target> to <dest> without changing any other
behaviour, and then the second one implements keep_error handling
for the "dest->file != NULL" case.

There may be other things this patch does, in which case it may
deserve to become three or more steps, but we haven't seen enough to
decide if that is the case.  Let's read on.

>  int http_get_strbuf(const char *url,
> -		    struct strbuf *result,
> +		    struct strbuf *dest_buffer,
>  		    struct http_get_options *options)
>  {
> -	return http_request_reauth(url, result, HTTP_REQUEST_STRBUF, options);
> +	struct http_response_dest dest;
> +	dest.file = NULL;
> +	dest.buffer = dest_buffer;
> +	return http_request_reauth(url, &dest, options);

This is merely adjusting for the updated calling convention, which
makes sense.

>  }
>  
>  /*
> @@ -1988,18 +2001,20 @@ static int http_get_file(const char *url, const char *filename,
>  {
>  	int ret;
>  	struct strbuf tmpfile = STRBUF_INIT;
> -	FILE *result;
> +	struct http_response_dest dest;
>  
>  	strbuf_addf(&tmpfile, "%s.temp", filename);
> -	result = fopen(tmpfile.buf, "a");
> -	if (!result) {
> +	dest.buffer = NULL;
> +	dest.file = fopen(tmpfile.buf, "a");
> +	if (!dest.file) {
>  		error("Unable to open local file %s", tmpfile.buf);
>  		ret = HTTP_ERROR;
>  		goto cleanup;
>  	}
> +	dest.filename = tmpfile.buf;

Hmph.  I somehow expected that the presence of dest.filename field
would allow callers to just set it and let the fopen() call handled
in http_request(), e.g. at the beginning of the function, it would
do something like

	if (!dest->file && dest->filename)
		dest->file = fopen(...);

But obviously that is not within the scope of this change.  Leaving
the caller responsible for opening and reporting errors as before
like the above is preferrable.

> diff --git a/remote-curl.c b/remote-curl.c
> index 1220dffcd..48656bf18 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -546,14 +546,31 @@ static curlioerr rpc_ioctl(CURL *handle, int cmd, void *clientp)
>  }
>  #endif
>  
> +struct rpc_in_data {
> +	struct rpc_state *rpc;
> +	struct active_request_slot *slot;
> +};
> +
> +/*
> + * A callback for CURLOPT_WRITEFUNCTION. The return value is the bytes consumed
> + * from ptr.
> + */
>  static size_t rpc_in(char *ptr, size_t eltsize,
>  		size_t nmemb, void *buffer_)
>  {
>  	size_t size = eltsize * nmemb;
> -	struct rpc_state *rpc = buffer_;
> +	struct rpc_in_data *data = buffer_;
> +	long response_code;
> +
> +	if (curl_easy_getinfo(data->slot->curl, CURLINFO_RESPONSE_CODE,
> +			      &response_code) != CURLE_OK)
> +		return size;
> +	if (response_code != 200)
> +		return size;
> +
>  	if (size)
> -		rpc->any_written = 1;
> -	write_or_die(rpc->in, ptr, size);
> +		data->rpc->any_written = 1;
> +	write_or_die(data->rpc->in, ptr, size);
>  	return size;
>  }

This is not necessarily related to the change we saw in http.c but
making it more careful in general?  That is, we avoid writing the
payload to the destination (by the way, rpc->IN being the target of
write(2) is somewhat a brain-twister).  It may deserve to become its
own step in a patch series, with separate justification (i.e. "when
rpc channel receives an error from the cURL library, we used to
write the data to the file anyway, and that caused such and such
problems, as demonstrated by the new test added by this patch.  we
fix it by checking for the error before writing to the file").

> @@ -633,6 +650,7 @@ static int post_rpc(struct rpc_state *rpc)
>  	size_t gzip_size = 0;
>  	int err, large_request = 0;
>  	int needs_100_continue = 0;
> +	struct rpc_in_data rpc_in_data;
>  
>  	/* Try to load the entire request, if we can fit it into the
>  	 * allocated buffer space we can use HTTP/1.0 and avoid the
> @@ -765,8 +783,9 @@ static int post_rpc(struct rpc_state *rpc)
>  
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
>  	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, rpc_in);
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, rpc);
> -
> +	rpc_in_data.rpc = rpc;
> +	rpc_in_data.slot = slot;
> +	curl_easy_setopt(slot->curl, CURLOPT_FILE, &rpc_in_data);
>  
>  	rpc->any_written = 0;
>  	err = run_slot(slot, NULL);

These two hunks look like mere adjustments for the new callback
type, which makes sense.

Thanks.
Jeff King Jan. 4, 2019, 10:11 a.m. UTC | #2
On Thu, Jan 03, 2019 at 11:09:02AM -0800, Junio C Hamano wrote:

> > +	if (dest->file) {
> > +		/*
> > +		 * At this point, the file contains the response body of the
> > +		 * previous request. We need to truncate the file.
> > +		 */
> > +		FILE *new_file = freopen(dest->filename, "w", dest->file);
> 
> Now freopen() lets us restart the file anew with a new "FILE *".
> 
> > +		if (new_file == NULL) {
> > +			error("Unable to open local file %s", dest->filename);
> 
> error_errno(), perhaps?
> 
> At this point, I presume that dest->file is closed by the failed
> freopen(), but dest->file is still non-NULL and causes further calls
> to http_request() with this dest would be a disaster?  As long as
> the caller of this function reacts to HTTP_ERROR and kill the dest,
> it would be fine.

I also wondered what timing guarantees freopen() gives us (i.e., is it
possible for it to open and truncate the file, and then close the old
handle, flushing some in-memory buffer). C99 says:

  The freopen function first attempts to close any file that is
  associated with the specified stream. Failure to close the file is
  ignored. The error and end-of-file indicators for the stream are
  cleared.

So I think the order is OK for my concern, but not for yours. I.e., on
an error, dest->file is now undefined.

It might be nice to set "dest->file == NULL" in that case. There's no
guarantee that the caller did not hold onto its own copy of the handle,
but since this struct is only exposed internally within http.c, that's
probably OK.

The most robust thing would perhaps be:

  fflush(dest->file);
  ftruncate(fileno(dest->file), 0);

which leaves the handle intact.

(I agree with the rest of your review, especially that it would be
easier to read if this were split into separate refactor and
change-behavior steps).

-Peff
Jeff King Jan. 4, 2019, 10:30 a.m. UTC | #3
On Sat, Dec 29, 2018 at 11:44:46AM -0800, Masaya Suzuki wrote:

> +/*
> + * A callback for CURLOPT_WRITEFUNCTION. The return value is the bytes consumed
> + * from ptr.
> + */
>  static size_t rpc_in(char *ptr, size_t eltsize,
>  		size_t nmemb, void *buffer_)
>  {
>  	size_t size = eltsize * nmemb;
> -	struct rpc_state *rpc = buffer_;
> +	struct rpc_in_data *data = buffer_;
> +	long response_code;
> +
> +	if (curl_easy_getinfo(data->slot->curl, CURLINFO_RESPONSE_CODE,
> +			      &response_code) != CURLE_OK)
> +		return size;

This hunk was unexpected to me. The function here is just writing out
the data, and I expected we'd handle the error after the whole transfer
is done. But we do that anyway eventually via run_slot() (which uses
handle_curl_result). I guess the goal here is to start throwing away
data when we see an error, rather than storing it?

That makes some sense, though I do wonder if there's any case where curl
would call our WRITEFUNCTION before it knows the HTTP status. That
implies a body before our header, which seems impossible, though.

> +	if (response_code != 200)
> +		return size;

The current behavior with CURLOPT_FAILONERROR treats codes >= 400 as an
error. And in handle_curl_result(), we treat >= 300 as an error (since
we only see 3xx for a disabled redirect). I suppose it's unlikely for us
to see any success code besides 200, but we probably ought to be
following the same rules here.

-Peff
Junio C Hamano Jan. 4, 2019, 8:13 p.m. UTC | #4
Jeff King <peff@peff.net> writes:

> The most robust thing would perhaps be:
>
>   fflush(dest->file);
>   ftruncate(fileno(dest->file), 0);
>
> which leaves the handle intact.

An added benefit of that approach is that there is no need for
the filename field in the dest structure.

Having a separate filename field could be a positive flexibility (it
could be used to open a file, store its FILE* in dest->file, while
storing the name of another file in dest->filename), but also a
negative flexibility (dest->file possibly pointing to a file
different from dest->filename is a source of confusion).  As I do
not think any current caller that wants such a flexibility, or
callers in any immediate future, it probably is an overall plus if
we do not have to add the dest->filename field.

> (I agree with the rest of your review, especially that it would be
> easier to read if this were split into separate refactor and
> change-behavior steps).
>
> -Peff
diff mbox series

Patch

diff --git a/http.c b/http.c
index eacc2a75e..d23417670 100644
--- a/http.c
+++ b/http.c
@@ -165,6 +165,19 @@  static int http_schannel_check_revoke = 1;
  */
 static int http_schannel_use_ssl_cainfo;
 
+/*
+ * Where to store the result of http_request.
+ *
+ * At most one of buffer or file can be non-NULL. The buffer and file are not
+ * allocated by http_request, and the caller is responsible for releasing them.
+ */
+struct http_response_dest {
+	struct strbuf *buffer;
+
+	FILE *file;
+	const char *filename;
+};
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
 	size_t size = eltsize * nmemb;
@@ -1794,12 +1807,8 @@  static void http_opt_request_remainder(CURL *curl, off_t pos)
 	curl_easy_setopt(curl, CURLOPT_RANGE, buf);
 }
 
-/* http_request() targets */
-#define HTTP_REQUEST_STRBUF	0
-#define HTTP_REQUEST_FILE	1
-
 static int http_request(const char *url,
-			void *result, int target,
+			struct http_response_dest *dest,
 			const struct http_get_options *options)
 {
 	struct active_request_slot *slot;
@@ -1812,21 +1821,23 @@  static int http_request(const char *url,
 	slot = get_active_slot();
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
 
-	if (result == NULL) {
-		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
-	} else {
+	if (dest->file) {
+		off_t posn = ftello(dest->file);
 		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
-		curl_easy_setopt(slot->curl, CURLOPT_FILE, result);
-
-		if (target == HTTP_REQUEST_FILE) {
-			off_t posn = ftello(result);
-			curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
-					 fwrite);
-			if (posn > 0)
-				http_opt_request_remainder(slot->curl, posn);
-		} else
-			curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
-					 fwrite_buffer);
+		curl_easy_setopt(slot->curl, CURLOPT_FILE,
+				 dest->file);
+		curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
+				 fwrite);
+		if (posn > 0)
+			http_opt_request_remainder(slot->curl, posn);
+	} else if (dest->buffer) {
+		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
+		curl_easy_setopt(slot->curl, CURLOPT_FILE,
+				 dest->buffer);
+		curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION,
+				 fwrite_buffer);
+	} else {
+		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
 	}
 
 	accept_language = get_accept_language();
@@ -1930,10 +1941,10 @@  static int update_url_from_redirect(struct strbuf *base,
 }
 
 static int http_request_reauth(const char *url,
-			       void *result, int target,
+			       struct http_response_dest *dest,
 			       struct http_get_options *options)
 {
-	int ret = http_request(url, result, target, options);
+	int ret = http_request(url, dest, options);
 
 	if (ret != HTTP_OK && ret != HTTP_REAUTH)
 		return ret;
@@ -1949,32 +1960,34 @@  static int http_request_reauth(const char *url,
 	if (ret != HTTP_REAUTH)
 		return ret;
 
-	/*
-	 * If we are using KEEP_ERROR, the previous request may have
-	 * put cruft into our output stream; we should clear it out before
-	 * making our next request. We only know how to do this for
-	 * the strbuf case, but that is enough to satisfy current callers.
-	 */
-	if (options && options->keep_error) {
-		switch (target) {
-		case HTTP_REQUEST_STRBUF:
-			strbuf_reset(result);
-			break;
-		default:
-			BUG("HTTP_KEEP_ERROR is only supported with strbufs");
+	if (dest->file) {
+		/*
+		 * At this point, the file contains the response body of the
+		 * previous request. We need to truncate the file.
+		 */
+		FILE *new_file = freopen(dest->filename, "w", dest->file);
+		if (new_file == NULL) {
+			error("Unable to open local file %s", dest->filename);
+			return HTTP_ERROR;
 		}
+		dest->file = new_file;
+	} else if (dest->buffer) {
+		strbuf_reset(dest->buffer);
 	}
 
 	credential_fill(&http_auth);
 
-	return http_request(url, result, target, options);
+	return http_request(url, dest, options);
 }
 
 int http_get_strbuf(const char *url,
-		    struct strbuf *result,
+		    struct strbuf *dest_buffer,
 		    struct http_get_options *options)
 {
-	return http_request_reauth(url, result, HTTP_REQUEST_STRBUF, options);
+	struct http_response_dest dest;
+	dest.file = NULL;
+	dest.buffer = dest_buffer;
+	return http_request_reauth(url, &dest, options);
 }
 
 /*
@@ -1988,18 +2001,20 @@  static int http_get_file(const char *url, const char *filename,
 {
 	int ret;
 	struct strbuf tmpfile = STRBUF_INIT;
-	FILE *result;
+	struct http_response_dest dest;
 
 	strbuf_addf(&tmpfile, "%s.temp", filename);
-	result = fopen(tmpfile.buf, "a");
-	if (!result) {
+	dest.buffer = NULL;
+	dest.file = fopen(tmpfile.buf, "a");
+	if (!dest.file) {
 		error("Unable to open local file %s", tmpfile.buf);
 		ret = HTTP_ERROR;
 		goto cleanup;
 	}
+	dest.filename = tmpfile.buf;
 
-	ret = http_request_reauth(url, result, HTTP_REQUEST_FILE, options);
-	fclose(result);
+	ret = http_request_reauth(url, &dest, options);
+	fclose(dest.file);
 
 	if (ret == HTTP_OK && finalize_object_file(tmpfile.buf, filename))
 		ret = HTTP_ERROR;
diff --git a/remote-curl.c b/remote-curl.c
index 1220dffcd..48656bf18 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -546,14 +546,31 @@  static curlioerr rpc_ioctl(CURL *handle, int cmd, void *clientp)
 }
 #endif
 
+struct rpc_in_data {
+	struct rpc_state *rpc;
+	struct active_request_slot *slot;
+};
+
+/*
+ * A callback for CURLOPT_WRITEFUNCTION. The return value is the bytes consumed
+ * from ptr.
+ */
 static size_t rpc_in(char *ptr, size_t eltsize,
 		size_t nmemb, void *buffer_)
 {
 	size_t size = eltsize * nmemb;
-	struct rpc_state *rpc = buffer_;
+	struct rpc_in_data *data = buffer_;
+	long response_code;
+
+	if (curl_easy_getinfo(data->slot->curl, CURLINFO_RESPONSE_CODE,
+			      &response_code) != CURLE_OK)
+		return size;
+	if (response_code != 200)
+		return size;
+
 	if (size)
-		rpc->any_written = 1;
-	write_or_die(rpc->in, ptr, size);
+		data->rpc->any_written = 1;
+	write_or_die(data->rpc->in, ptr, size);
 	return size;
 }
 
@@ -633,6 +650,7 @@  static int post_rpc(struct rpc_state *rpc)
 	size_t gzip_size = 0;
 	int err, large_request = 0;
 	int needs_100_continue = 0;
+	struct rpc_in_data rpc_in_data;
 
 	/* Try to load the entire request, if we can fit it into the
 	 * allocated buffer space we can use HTTP/1.0 and avoid the
@@ -765,8 +783,9 @@  static int post_rpc(struct rpc_state *rpc)
 
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
 	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, rpc_in);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, rpc);
-
+	rpc_in_data.rpc = rpc;
+	rpc_in_data.slot = slot;
+	curl_easy_setopt(slot->curl, CURLOPT_FILE, &rpc_in_data);
 
 	rpc->any_written = 0;
 	err = run_slot(slot, NULL);