diff mbox series

[v4,4/5] unpack-objects.c: add dry_run mode for get_data()

Message ID 20211203093530.93589-5-chiyutianyi@gmail.com (mailing list archive)
State Superseded
Headers show
Series unpack large objects in stream | expand

Commit Message

Han Xin Dec. 3, 2021, 9:35 a.m. UTC
From: Han Xin <hanxin.hx@alibaba-inc.com>

In dry_run mode, "get_data()" is used to verify the inflation of data,
and the returned buffer will not be used at all and will be freed
immediately. Even in dry_run mode, it is dangerous to allocate a
full-size buffer for a large blob object. Therefore, only allocate a
low memory footprint when calling "get_data()" in dry_run mode.

Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
---
 builtin/unpack-objects.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Dec. 3, 2021, 1:59 p.m. UTC | #1
On Fri, Dec 03 2021, Han Xin wrote:

> +	unsigned long bufsize = dry_run ? 4096 : size;
> +	void *buf = xmallocz(bufsize);

It's probably nothing, but in your CL you note that you changed another
hardcoding from 4k to 8k, should this one still be 4k?

It's probably fine, just wondering...
Han Xin Dec. 6, 2021, 3:20 a.m. UTC | #2
On Fri, Dec 3, 2021 at 10:00 PM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
>
> On Fri, Dec 03 2021, Han Xin wrote:
>
> > +     unsigned long bufsize = dry_run ? 4096 : size;
> > +     void *buf = xmallocz(bufsize);
>
> It's probably nothing, but in your CL you note that you changed another
> hardcoding from 4k to 8k, should this one still be 4k?
>
> It's probably fine, just wondering...

Yes, I think this is an omission from my work.
diff mbox series

Patch

diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 4a9466295b..8d68acd662 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -96,15 +96,16 @@  static void use(int bytes)
 	display_throughput(progress, consumed_bytes);
 }
 
-static void *get_data(unsigned long size)
+static void *get_data(unsigned long size, int dry_run)
 {
 	git_zstream stream;
-	void *buf = xmallocz(size);
+	unsigned long bufsize = dry_run ? 4096 : size;
+	void *buf = xmallocz(bufsize);
 
 	memset(&stream, 0, sizeof(stream));
 
 	stream.next_out = buf;
-	stream.avail_out = size;
+	stream.avail_out = bufsize;
 	stream.next_in = fill(1);
 	stream.avail_in = len;
 	git_inflate_init(&stream);
@@ -124,6 +125,11 @@  static void *get_data(unsigned long size)
 		}
 		stream.next_in = fill(1);
 		stream.avail_in = len;
+		if (dry_run) {
+			/* reuse the buffer in dry_run mode */
+			stream.next_out = buf;
+			stream.avail_out = bufsize;
+		}
 	}
 	git_inflate_end(&stream);
 	return buf;
@@ -323,7 +329,7 @@  static void added_object(unsigned nr, enum object_type type,
 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 				   unsigned nr)
 {
-	void *buf = get_data(size);
+	void *buf = get_data(size, dry_run);
 
 	if (!dry_run && buf)
 		write_object(nr, type, buf, size);
@@ -357,7 +363,7 @@  static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 	if (type == OBJ_REF_DELTA) {
 		oidread(&base_oid, fill(the_hash_algo->rawsz));
 		use(the_hash_algo->rawsz);
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;
@@ -396,7 +402,7 @@  static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 		if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
 			die("offset value out of bound for delta base object");
 
-		delta_data = get_data(delta_size);
+		delta_data = get_data(delta_size, dry_run);
 		if (dry_run || !delta_data) {
 			free(delta_data);
 			return;