cat-file: reduce write calls for unfiltered blobs

Message ID	20240621020457.1081233-1-e@80x24.org (mailing list archive)
State	New, archived
Headers	show Received: from dcvr.yhbt.net (dcvr.yhbt.net [173.255.242.215]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C0194259C for <git@vger.kernel.org>; Fri, 21 Jun 2024 02:05:04 +0000 (UTC) From: Eric Wong <e@80x24.org> To: git@vger.kernel.org Subject: [PATCH] cat-file: reduce write calls for unfiltered blobs Date: Fri, 21 Jun 2024 02:04:57 +0000 Message-ID: <20240621020457.1081233-1-e@80x24.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	cat-file: reduce write calls for unfiltered blobs \| expand cat-file: reduce write calls for unfiltered blobs

diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 43a1d7ac49..23db5c6a7a 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -87,9 +87,10 @@ static int filter_object(const char *path, unsigned mode, return 0; } -static int stream_blob(const struct object_id *oid) +static int stream_blob(struct strbuf *scratch, const struct object_id *oid) { - if (stream_blob_to_fd(1, oid, NULL, 0)) + if (scratch ? stream_blob_to_strbuf_fd(1, scratch, oid) : + stream_blob_to_fd(1, oid, NULL, 0)) die("unable to stream %s to stdout", oid_to_hex(oid)); return 0; } @@ -195,7 +196,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, } if (type == OBJ_BLOB) { - ret = stream_blob(&oid); + ret = stream_blob(NULL, &oid); goto cleanup; } buf = repo_read_object_file(the_repository, &oid, &type, @@ -237,7 +238,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, oidcpy(&blob_oid, &oid); if (oid_object_info(the_repository, &blob_oid, NULL) == OBJ_BLOB) { - ret = stream_blob(&blob_oid); + ret = stream_blob(NULL, &blob_oid); goto cleanup; } /* @@ -376,7 +377,15 @@ static void batch_write(struct batch_options *opt, const void *data, int len) write_or_die(1, data, len); } -static void print_object_or_die(struct batch_options *opt, struct expand_data *data) +static void flush_scratch(struct batch_options *opt, struct strbuf *scratch) +{ + batch_write(opt, scratch->buf, scratch->len); + strbuf_reset(scratch); +} + +static void print_object_or_die(struct strbuf *scratch, + struct batch_options *opt, + struct expand_data *data) { const struct object_id *oid = &data->oid; @@ -389,6 +398,8 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d char *contents; unsigned long size; + flush_scratch(opt, scratch); + if (!data->rest) die("missing path for '%s'", oid_to_hex(oid)); @@ -414,7 +425,7 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d batch_write(opt, contents, size); free(contents); } else { - stream_blob(oid); + stream_blob(scratch, oid); } } else { @@ -422,6 +433,8 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d unsigned long size; void *contents; + flush_scratch(opt, scratch); + contents = repo_read_object_file(the_repository, oid, &type, &size); if (!contents) @@ -498,8 +511,6 @@ static void batch_object_write(const char *obj_name, } } - strbuf_reset(scratch); - if (!opt->format) { print_default_format(scratch, data, opt); } else { @@ -507,12 +518,11 @@ static void batch_object_write(const char *obj_name, strbuf_addch(scratch, opt->output_delim); } - batch_write(opt, scratch->buf, scratch->len); - if (opt->batch_mode == BATCH_MODE_CONTENTS) { - print_object_or_die(opt, data); - batch_write(opt, &opt->output_delim, 1); + print_object_or_die(scratch, opt, data); + strbuf_addch(scratch, opt->output_delim); } + flush_scratch(opt, scratch); } static void batch_one_object(const char *obj_name, @@ -787,7 +797,7 @@ static int batch_objects(struct batch_options *opt) opt->format ? opt->format : DEFAULT_FORMAT, &data); data.mark_query = 0; - strbuf_release(&output); + strbuf_reset(&output); if (opt->transform_mode) data.split_on_whitespace = 1; diff --git a/streaming.c b/streaming.c index 10adf625b2..9787449c50 100644 --- a/streaming.c +++ b/streaming.c @@ -10,6 +10,8 @@ #include "object-store-ll.h" #include "replace-object.h" #include "packfile.h" +#include "strbuf.h" +#include "hex.h" typedef int (*open_istream_fn)(struct git_istream *, struct repository *, @@ -546,3 +548,51 @@ int stream_blob_to_fd(int fd, const struct object_id *oid, struct stream_filter close_istream(st); return result; } + +/* + * stdio buffering requires extra data copies, using strbuf + * allows us to read_istream directly into a scratch buffer + */ +int stream_blob_to_strbuf_fd(int fd, struct strbuf *sb, + const struct object_id *oid) +{ + size_t bufsz = 16 * 1024; + struct git_istream *st; + enum object_type type; + unsigned long sz; + int result = -1; + + st = open_istream(the_repository, oid, &type, &sz, NULL); + if (!st) + return result; + if (type != OBJ_BLOB) + goto close_and_exit; + if (bufsz > sz) + bufsz = sz; + strbuf_grow(sb, bufsz + 1); /* extra byte for output_delim */ + while (sz) { + ssize_t readlen = read_istream(st, sb->buf + sb->len, bufsz); + + if (readlen < 0) + goto close_and_exit; + if (readlen == 0) + die("unexpected EOF from %s\n", oid_to_hex(oid)); + sz -= readlen; + if (!sz) { + /* + * done, keep the last bit buffered for caller to + * append output_delim + */ + strbuf_setlen(sb, sb->len + readlen); + break; + } + if (write_in_full(fd, sb->buf, sb->len + readlen) < 0) + goto close_and_exit; + strbuf_reset(sb); + } + result = 0; + + close_and_exit: + close_istream(st); + return result; +} diff --git a/streaming.h b/streaming.h index bd27f59e57..3cba4fe016 100644 --- a/streaming.h +++ b/streaming.h @@ -17,5 +17,6 @@ int close_istream(struct git_istream *); ssize_t read_istream(struct git_istream *, void *, size_t); int stream_blob_to_fd(int fd, const struct object_id *, struct stream_filter *, int can_seek); +int stream_blob_to_strbuf_fd(int fd, struct strbuf *, const struct object_id *); #endif /* STREAMING_H */

cat-file: reduce write calls for unfiltered blobs

Commit Message

Comments

Patch