@@ -468,8 +468,8 @@ usage, at the slight expense of increased disk usage.
* Will generally be streamed when written, which avoids excessive
memory usage, at the cost of some fixed overhead. Commands that make
use of this include linkgit:git-archive[1],
-linkgit:git-fast-import[1], linkgit:git-index-pack[1] and
-linkgit:git-fsck[1].
+linkgit:git-fast-import[1], linkgit:git-index-pack[1],
+linkgit:git-unpack-objects[1] and linkgit:git-fsck[1].
core.excludesFile::
Specifies the pathname to the file that contains patterns to
@@ -351,6 +351,68 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
write_object(nr, type, buf, size);
}
+struct input_zstream_data {
+ git_zstream *zstream;
+ unsigned char buf[8192];
+ int status;
+};
+
+static const void *feed_input_zstream(struct input_stream *in_stream,
+ unsigned long *readlen)
+{
+ struct input_zstream_data *data = in_stream->data;
+ git_zstream *zstream = data->zstream;
+ void *in = fill(1);
+
+ if (in_stream->is_finished) {
+ *readlen = 0;
+ return NULL;
+ }
+
+ zstream->next_out = data->buf;
+ zstream->avail_out = sizeof(data->buf);
+ zstream->next_in = in;
+ zstream->avail_in = len;
+
+ data->status = git_inflate(zstream, 0);
+
+ in_stream->is_finished = data->status != Z_OK;
+ use(len - zstream->avail_in);
+ *readlen = sizeof(data->buf) - zstream->avail_out;
+
+ return data->buf;
+}
+
+static void stream_blob(unsigned long size, unsigned nr)
+{
+ git_zstream zstream = { 0 };
+ struct input_zstream_data data = { 0 };
+ struct input_stream in_stream = {
+ .read = feed_input_zstream,
+ .data = &data,
+ };
+ struct obj_info *info = &obj_list[nr];
+
+ data.zstream = &zstream;
+ git_inflate_init(&zstream);
+
+ if (stream_loose_object(&in_stream, size, &info->oid))
+ die(_("failed to write object in stream"));
+
+ if (data.status != Z_STREAM_END)
+ die(_("inflate returned (%d)"), data.status);
+ git_inflate_end(&zstream);
+
+ if (strict) {
+ struct blob *blob = lookup_blob(the_repository, &info->oid);
+
+ if (!blob)
+ die(_("invalid blob object from stream"));
+ blob->object.flags |= FLAG_WRITTEN;
+ }
+ info->obj = NULL;
+}
+
static int resolve_against_held(unsigned nr, const struct object_id *base,
void *delta_data, unsigned long delta_size)
{
@@ -483,9 +545,14 @@ static void unpack_one(unsigned nr)
}
switch (type) {
+ case OBJ_BLOB:
+ if (!dry_run && size > big_file_threshold) {
+ stream_blob(size, nr);
+ return;
+ }
+ /* fallthrough */
case OBJ_COMMIT:
case OBJ_TREE:
- case OBJ_BLOB:
case OBJ_TAG:
unpack_non_delta_entry(type, size, nr);
return;
@@ -9,7 +9,8 @@ test_description='git unpack-objects with large objects'
prepare_dest () {
test_when_finished "rm -rf dest.git" &&
- git init --bare dest.git
+ git init --bare dest.git &&
+ git -C dest.git config core.bigFileThreshold "$1"
}
test_expect_success "create large objects (1.5 MB) and PACK" '
@@ -17,7 +18,10 @@ test_expect_success "create large objects (1.5 MB) and PACK" '
test_commit --append foo big-blob &&
test-tool genrandom bar 1500000 >big-blob &&
test_commit --append bar big-blob &&
- PACK=$(echo HEAD | git pack-objects --revs pack)
+ PACK=$(echo HEAD | git pack-objects --revs pack) &&
+ git verify-pack -v pack-$PACK.pack >out &&
+ sed -n -e "s/^\([0-9a-f][0-9a-f]*\).*\(commit\|tree\|blob\).*/\1/p" \
+ <out >obj-list
'
test_expect_success 'set memory limitation to 1MB' '
@@ -26,16 +30,47 @@ test_expect_success 'set memory limitation to 1MB' '
'
test_expect_success 'unpack-objects failed under memory limitation' '
- prepare_dest &&
+ prepare_dest 2m &&
test_must_fail git -C dest.git unpack-objects <pack-$PACK.pack 2>err &&
grep "fatal: attempting to allocate" err
'
test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
- prepare_dest &&
+ prepare_dest 2m &&
git -C dest.git unpack-objects -n <pack-$PACK.pack &&
test_stdout_line_count = 0 find dest.git/objects -type f &&
test_dir_is_empty dest.git/objects/pack
'
+test_expect_success 'unpack big object in stream' '
+ prepare_dest 1m &&
+ git -C dest.git unpack-objects <pack-$PACK.pack &&
+ test_dir_is_empty dest.git/objects/pack
+'
+
+BATCH_CONFIGURATION='-c core.fsync=loose-object -c core.fsyncmethod=batch'
+
+test_expect_success 'unpack big object in stream (core.fsyncmethod=batch)' '
+ prepare_dest 1m &&
+ GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
+ git -C dest.git $BATCH_CONFIGURATION unpack-objects <pack-$PACK.pack &&
+ grep fsync/hardware-flush trace2.txt &&
+ test_dir_is_empty dest.git/objects/pack &&
+ git -C dest.git cat-file --batch-check="%(objectname)" <obj-list >current &&
+ cmp obj-list current
+'
+
+test_expect_success 'do not unpack existing large objects' '
+ prepare_dest 1m &&
+ git -C dest.git index-pack --stdin <pack-$PACK.pack &&
+ git -C dest.git unpack-objects <pack-$PACK.pack &&
+
+ # The destination came up with the exact same pack...
+ DEST_PACK=$(echo dest.git/objects/pack/pack-*.pack) &&
+ test_cmp pack-$PACK.pack $DEST_PACK &&
+
+ # ...and wrote no loose objects
+ test_stdout_line_count = 0 find dest.git/objects -type f ! -name "pack-*"
+'
+
test_done