diff mbox series

[v2,08/10] cat-file: batch-command uses content_limit

Message ID 20240823224630.1180772-9-e@80x24.org (mailing list archive)
State New
Headers show
Series cat-file speedups | expand

Commit Message

Eric Wong Aug. 23, 2024, 10:46 p.m. UTC
As with the normal `--batch' mode, we can use the content_limit
round trip optimization to avoid a redundant lookup.  The only
tricky thing here is we need to enable/disable setting the
object_info.contentp field depending on whether we hit an `info'
or `contents' command.

t1006 is updated to ensure we can switch back and forth between
`info' and `contents' commands without problems.

Signed-off-by: Eric Wong <e@80x24.org>
---
 builtin/cat-file.c | 32 ++++++--------------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

Comments

Junio C Hamano Aug. 26, 2024, 10:13 p.m. UTC | #1
Eric Wong <e@80x24.org> writes:

> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 2aedd62324..067cdbdbf9 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -417,7 +417,8 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
>  		case OI_DBCACHED:
>  			unlock_delta_base_cache();
>  		}
> -	} else if (data->type == OBJ_BLOB) {
> +	} else {
> +		assert(data->type == OBJ_BLOB);
>  		if (opt->buffer_output)
>  			fflush(stdout);
>  		if (opt->transform_mode) {

Hmph, because until this step, the control could reach this function
with data->content NULL for a non-BLOB type, even though we
eliminated that with the previous step for "--batch" command user.
This step covers "--batch-command" user to also force the
data->content to be populated, so we no longer will see anything but
BLOB when data->content is NULL.

That sounds correct with the current code, but it somehow feels a
bit too subtle to my taste.  In addition to an assert(), future
readers of this code would deserve a comment describing why we can
safely assume that blobs are the only types we will see here.  That
way, they can tell when they add another command that ends up calling
this function that they too will need to do the contentp thing.

> @@ -452,30 +453,6 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d
>  			stream_blob(oid);
>  		}
>  	}
> -	else {
> -		enum object_type type;
> -		unsigned long size;
> -		void *contents;
> -
> -		contents = repo_read_object_file(the_repository, oid, &type,
> -						 &size);
> -		if (!contents)
> -			die("object %s disappeared", oid_to_hex(oid));
> -
> -		if (use_mailmap) {
> -			size_t s = size;
> -			contents = replace_idents_using_mailmap(contents, &s);
> -			size = cast_size_t_to_ulong(s);
> -		}
> -
> -		if (type != data->type)
> -			die("object %s changed type!?", oid_to_hex(oid));
> -		if (data->info.sizep && size != data->size && !use_mailmap)
> -			die("object %s changed size!?", oid_to_hex(oid));
> -
> -		batch_write(opt, contents, size);
> -		free(contents);
> -	}

And it certainly is nice that we can get rid of this fallback code.

By the way, the previous step sshould rename the .content_limit
member to make it clear that (1) it is about the size limit, and (2)
it only applies to blobs.  Ideally (1) should be done much earlier
in the series, and (2) must be done when the code starts to ignore
the member for non-blob types.

Thanks.
diff mbox series

Patch

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 2aedd62324..067cdbdbf9 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -417,7 +417,8 @@  static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 		case OI_DBCACHED:
 			unlock_delta_base_cache();
 		}
-	} else if (data->type == OBJ_BLOB) {
+	} else {
+		assert(data->type == OBJ_BLOB);
 		if (opt->buffer_output)
 			fflush(stdout);
 		if (opt->transform_mode) {
@@ -452,30 +453,6 @@  static void print_object_or_die(struct batch_options *opt, struct expand_data *d
 			stream_blob(oid);
 		}
 	}
-	else {
-		enum object_type type;
-		unsigned long size;
-		void *contents;
-
-		contents = repo_read_object_file(the_repository, oid, &type,
-						 &size);
-		if (!contents)
-			die("object %s disappeared", oid_to_hex(oid));
-
-		if (use_mailmap) {
-			size_t s = size;
-			contents = replace_idents_using_mailmap(contents, &s);
-			size = cast_size_t_to_ulong(s);
-		}
-
-		if (type != data->type)
-			die("object %s changed type!?", oid_to_hex(oid));
-		if (data->info.sizep && size != data->size && !use_mailmap)
-			die("object %s changed size!?", oid_to_hex(oid));
-
-		batch_write(opt, contents, size);
-		free(contents);
-	}
 }
 
 static void print_default_format(struct strbuf *scratch, struct expand_data *data,
@@ -689,6 +666,7 @@  static void parse_cmd_contents(struct batch_options *opt,
 			     struct expand_data *data)
 {
 	opt->batch_mode = BATCH_MODE_CONTENTS;
+	data->info.contentp = &data->content;
 	batch_one_object(line, output, opt, data);
 }
 
@@ -698,6 +676,7 @@  static void parse_cmd_info(struct batch_options *opt,
 			   struct expand_data *data)
 {
 	opt->batch_mode = BATCH_MODE_INFO;
+	data->info.contentp = NULL;
 	batch_one_object(line, output, opt, data);
 }
 
@@ -839,7 +818,8 @@  static int batch_objects(struct batch_options *opt)
 	 * Likewise, grab the content in the initial request if it's small
 	 * and we're not planning to filter it.
 	 */
-	if (opt->batch_mode == BATCH_MODE_CONTENTS) {
+	if ((opt->batch_mode == BATCH_MODE_CONTENTS) ||
+			(opt->batch_mode == BATCH_MODE_QUEUE_AND_DISPATCH)) {
 		data.info.typep = &data.type;
 		if (!opt->transform_mode) {
 			data.info.sizep = &data.size;