diff mbox series

[7/9] reftable/block: reuse uncompressed blocks

Message ID 1e4eba7e9b6b706b30eb5748156ce991ecb4ae37.1711519925.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series reftable: optimize table and block iterators | expand

Commit Message

Patrick Steinhardt March 27, 2024, 6:37 a.m. UTC
The reftable backend stores reflog entries in a compressed format and
thus needs to uncompress blocks before one can read records from it.
For each reflog block we thus have to allocate an array that we can
decompress the block contents into. This block is being discarded
whenever the table iterator moves to the next block. Consequently, we
reallocate a new array on every block, which is quite wasteful.

Refactor the code to reuse the uncompressed block data when moving the
block reader to a new block. This significantly reduces the number of
allocations when iterating through many compressed blocks. The following
measurements are done with `git reflog list` when listing 100k reflogs.
Before:

  HEAP SUMMARY:
      in use at exit: 13,473 bytes in 122 blocks
    total heap usage: 45,755 allocs, 45,633 frees, 254,779,456 bytes allocated

After:

  HEAP SUMMARY:
      in use at exit: 13,473 bytes in 122 blocks
    total heap usage: 23,028 allocs, 22,906 frees, 162,813,547 bytes allocated

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/block.c  | 14 ++++++--------
 reftable/block.h  |  4 ++++
 reftable/reader.c | 27 ++++++++++++++++-----------
 3 files changed, 26 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/reftable/block.c b/reftable/block.c
index 471ebd8580..31af075c1d 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -186,7 +186,6 @@  int block_reader_init(struct block_reader *br, struct reftable_block *block,
 	uint16_t restart_count = 0;
 	uint32_t restart_start = 0;
 	uint8_t *restart_bytes = NULL;
-	uint8_t *uncompressed = NULL;
 
 	reftable_block_done(&br->block);
 
@@ -202,14 +201,15 @@  int block_reader_init(struct block_reader *br, struct reftable_block *block,
 		uLongf src_len = block->len - block_header_skip;
 
 		/* Log blocks specify the *uncompressed* size in their header. */
-		REFTABLE_ALLOC_ARRAY(uncompressed, sz);
+		REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
+				    br->uncompressed_cap);
 
 		/* Copy over the block header verbatim. It's not compressed. */
-		memcpy(uncompressed, block->data, block_header_skip);
+		memcpy(br->uncompressed_data, block->data, block_header_skip);
 
 		/* Uncompress */
 		if (Z_OK !=
-		    uncompress2(uncompressed + block_header_skip, &dst_len,
+		    uncompress2(br->uncompressed_data + block_header_skip, &dst_len,
 				block->data + block_header_skip, &src_len)) {
 			err = REFTABLE_ZLIB_ERROR;
 			goto done;
@@ -222,10 +222,8 @@  int block_reader_init(struct block_reader *br, struct reftable_block *block,
 
 		/* We're done with the input data. */
 		reftable_block_done(block);
-		block->data = uncompressed;
-		uncompressed = NULL;
+		block->data = br->uncompressed_data;
 		block->len = sz;
-		block->source = malloc_block_source();
 		full_block_size = src_len + block_header_skip;
 	} else if (full_block_size == 0) {
 		full_block_size = sz;
@@ -254,12 +252,12 @@  int block_reader_init(struct block_reader *br, struct reftable_block *block,
 	br->restart_bytes = restart_bytes;
 
 done:
-	reftable_free(uncompressed);
 	return err;
 }
 
 void block_reader_release(struct block_reader *br)
 {
+	reftable_free(br->uncompressed_data);
 	reftable_block_done(&br->block);
 }
 
diff --git a/reftable/block.h b/reftable/block.h
index b41efa5042..79275d67f1 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -66,6 +66,10 @@  struct block_reader {
 	struct reftable_block block;
 	int hash_size;
 
+	/* Uncompressed data for log entries. */
+	unsigned char *uncompressed_data;
+	size_t uncompressed_cap;
+
 	/* size of the data, excluding restart data. */
 	uint32_t block_len;
 	uint8_t *restart_bytes;
diff --git a/reftable/reader.c b/reftable/reader.c
index dd4de294a1..aacd5f1337 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -459,6 +459,8 @@  static int reader_seek_linear(struct table_iter *ti,
 		 * we would not do a linear search there anymore.
 		 */
 		memset(&next.br.block, 0, sizeof(next.br.block));
+		next.br.uncompressed_data = NULL;
+		next.br.uncompressed_cap = 0;
 
 		err = table_iter_next_block(&next);
 		if (err < 0)
@@ -599,25 +601,28 @@  static int reader_seek_internal(struct reftable_reader *r,
 	struct reftable_reader_offsets *offs =
 		reader_offsets_for(r, reftable_record_type(rec));
 	uint64_t idx = offs->index_offset;
-	struct table_iter ti = TABLE_ITER_INIT;
-	int err = 0;
+	struct table_iter ti = TABLE_ITER_INIT, *p;
+	int err;
+
 	if (idx > 0)
 		return reader_seek_indexed(r, it, rec);
 
 	err = reader_start(r, &ti, reftable_record_type(rec), 0);
 	if (err < 0)
-		return err;
+		goto out;
+
 	err = reader_seek_linear(&ti, rec);
 	if (err < 0)
-		return err;
-	else {
-		struct table_iter *p =
-			reftable_malloc(sizeof(struct table_iter));
-		*p = ti;
-		iterator_from_table_iter(it, p);
-	}
+		goto out;
 
-	return 0;
+	REFTABLE_ALLOC_ARRAY(p, 1);
+	*p = ti;
+	iterator_from_table_iter(it, p);
+
+out:
+	if (err)
+		table_iter_close(&ti);
+	return err;
 }
 
 static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,